├── .gitignore ├── LICENSE ├── README.md ├── project.clj └── src └── deebn ├── core.clj ├── cv.clj ├── dbn.clj ├── dnn.clj ├── mnist.clj ├── protocols.clj ├── rbm.clj └── util.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | data 11 | models 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deebn 2 | 3 | A Clojure library implementing a Deep Belief Network using Restricted 4 | Boltzmann Machines, based on [Geoffery Hinton's work][work]. This 5 | library is the result of my thesis research into deep learning methods. 6 | 7 | ## "Installation" 8 | `deebn` is available for download or usage through your favorite dependency management tool from 9 | Clojars: 10 | 11 | [![Clojars Project](http://clojars.org/deebn/latest-version.svg)](http://clojars.org/deebn) 12 | 13 | ## Capabilities 14 | 15 | There are a few types of model that you can build and train, either 16 | for classification or as components of other models: 17 | 18 | - Restricted Boltzmann Machine 19 | - can be used as a component of a Deep Belief Network, or as a standalone discriminatory classifer 20 | Hyper-parameters: 21 | - learning rate 22 | - initial momentum 23 | - momentum (used after 'momentum-delay' epochs) 24 | - momentum-delay 25 | - batch-size 26 | - epochs 27 | - gap-delay (epochs to wait before testing for early stopping) 28 | - gap-stop-delay (consecutive positive energy gap epochs that initiate 29 | an early stop) 30 | - Deep Belief Network (composed of layers of RBMs) 31 | - Can be used to pre-train a Deep Neural Network, or as a discriminatory classifier 32 | (Note: a classification DBN is not fine-tuned - performance is sastifactory but not optimal) 33 | Hyper-parameters: 34 | - whether to use activations rather than samples from hidden layers when propagating 35 | to the next layer 36 | - Deep Neural Network 37 | - Initialized from a pre-trained DBN, with an additional logistic regression layer added 38 | - Network output is a softmax unit 39 | - Logistic regression unit is pre-trained with output from the DBN before moving to a 40 | full backprop training regimen 41 | Hyper-parameters: 42 | - batch-size 43 | - epochs 44 | - learning rate 45 | - lambda - L2 regularization (weight decay) parameter 46 | 47 | ## Usage 48 | 49 | The `core` namespace aims to offer examples of using the library. The 50 | `mnist` namespace offers examples for bringing in datasets (in this case 51 | the [MNIST][mnist] dataset). 52 | 53 | ## License 54 | 55 | Copyright © 2014 Chris Sims 56 | 57 | Distributed under the Eclipse Public License either version 1.0 or (at 58 | your option) any later version. 59 | 60 | [work]: http://www.cs.toronto.edu/~hinton/absps/montrealTR.pdf 61 | [mnist]: http://yann.lecun.com/exdb/mnist/ 62 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject deebn "0.1.1-SNAPSHOT" 2 | :description "Deep Belief Network using Restricted Boltzmann Machines" 3 | :url "https://jcsi.ms" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"] 7 | [org.clojure/tools.reader "0.8.15"] 8 | [org.clojure/data.csv "0.1.2"] 9 | [net.mikera/vectorz-clj "0.29.0"] 10 | [net.mikera/core.matrix "0.33.2"] 11 | [net.mikera/core.matrix.stats "0.5.0"] 12 | [com.taoensso/timbre "3.4.0"]] 13 | :profiles {:dev {:dependencies [[org.clojure/test.check "0.7.0"]]}} 14 | :jvm-opts ["-Xmx4g" "-Xms3g"] 15 | :deploy-repositories [["releases" :clojars]]) 16 | -------------------------------------------------------------------------------- /src/deebn/core.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.core 2 | (:require [deebn.dbn :refer [build-dbn build-classify-dbn]] 3 | [deebn.dnn :refer [dbn->dnn]] 4 | [deebn.mnist :refer [load-data-sans-label 5 | load-data-with-softmax load-data]] 6 | [deebn.protocols :refer [train-model test-model classify]] 7 | [deebn.rbm :refer [build-rbm build-jd-rbm]] 8 | [clojure.core.matrix :refer [set-current-implementation]])) 9 | 10 | (set-current-implementation :vectorz) 11 | 12 | (comment 13 | ;;; Choose a model and the corresponding dataset 14 | ;; A single Restricted Boltzmann Machine used for classification 15 | (def m (build-jd-rbm 784 500 10)) 16 | (def dataset (load-data-with-softmax "data/mnist_train.csv")) 17 | ;; A classification Deep Belief Network 18 | (def m (build-classify-dbn [784 500 500 2000] 10)) 19 | (def dataset (load-data-with-softmax "data/mnist_train.csv")) 20 | ;; A Deep Neural Network backed by a pre-trained Deep Belief Network 21 | (def m (build-dbn [784 500 500 250])) 22 | (def dataset (load-data-sans-label "data/mnist_train.csv")) 23 | 24 | ;;; Train the model 25 | (def m (train-model m dataset {:batch-size 100})) 26 | ;; For a DNN, the DBN is converted to a DNN before fine-tuning 27 | (def m (dbn->dnn m 10)) 28 | (def dataset (load-data "data/mnist_train.csv")) 29 | (def m (train-model m dataset {:batch-size 100 :epochs 10})) 30 | 31 | ;;; Test the model 32 | (def test-dataset (load-data "data/mnist_test.csv")) 33 | (test-model m test-dataset) 34 | 35 | ;;; Classify a single observation 36 | (def dv (first (load-data-sans-label "data/mnist_test.csv"))) 37 | (classify m dv) 38 | ) 39 | -------------------------------------------------------------------------------- /src/deebn/cv.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.cv 2 | (:require [clojure.core.matrix :as m] 3 | [clojure.core.matrix.select :as s] 4 | [deebn.protocols :refer [train-model test-model]] 5 | [deebn.dnn :refer [dbn->dnn]])) 6 | 7 | (defn select-folds 8 | "Returns a vector of vectors specifying holdout observations to form 9 | k folds in a dataset." 10 | [dataset k] 11 | (let [rows (vec (range (m/row-count dataset))) 12 | part-size (int (Math/ceil (/ (count rows) k)))] 13 | (vec (partition part-size part-size nil (shuffle rows))))) 14 | 15 | (defn k-fold-cross-validation 16 | "Perform k-fold cross-validation on a training model. A single data 17 | set is provided, and a vector of error percentages is 18 | returned. Since training data and test data are in potentially 19 | different formats, pass both in for use. These should be two formats 20 | of the same data set to use for validation." 21 | [model train-data test-data params k] 22 | (let [holdouts (select-folds train-data k)] 23 | (mapv (fn [holdouts] 24 | (let [train-folds (s/sel train-data (s/exclude holdouts) (s/irange)) 25 | test-fold (s/sel test-data holdouts (s/irange)) 26 | m (train-model model train-folds params)] 27 | (test-model m test-fold))) 28 | holdouts))) 29 | 30 | (defn k-fold-cross-validation-dnn 31 | "Perform k-fold cross-validation on a training model. A single data 32 | set is provided, and a vector of error percentages is 33 | returned. Since training data and test data are in potentially 34 | different formats, pass both in for use. These should be two formats 35 | of the same data set to use for validation. DNNs require two 36 | training steps, and so require a slightly different validation 37 | approach." 38 | [model train-data test-data params k classes] 39 | (let [holdouts (select-folds train-data k)] 40 | (mapv (fn [holdouts] 41 | (let [train-folds (m/matrix (s/sel train-data 42 | (s/exclude holdouts) (s/irange))) 43 | test-fold (m/matrix (s/sel test-data holdouts (s/irange))) 44 | m (train-model model train-folds params) 45 | m (dbn->dnn m classes) 46 | train-folds (m/matrix (s/sel test-data (s/exclude holdouts) 47 | (s/irange))) 48 | m (train-model m train-folds params)] 49 | (test-model m test-fold))) 50 | holdouts))) 51 | -------------------------------------------------------------------------------- /src/deebn/dbn.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.dbn 2 | (:require [deebn.protocols :as p] 3 | [deebn.rbm :refer [build-rbm build-jd-rbm edn->CRBM edn->RBM]] 4 | [deebn.util :refer [query-hidden]] 5 | [clojure.core.matrix :as m] 6 | [clojure.tools.reader.edn :as edn] 7 | [clojure.core.matrix.select :as s])) 8 | 9 | (defrecord DBN [rbms layers]) 10 | (defrecord CDBN [rbms layers classes]) 11 | 12 | (m/set-current-implementation :vectorz) 13 | 14 | (defn build-dbn 15 | "Build a Deep Belief Network composed of Restricted Boltzmann Machines. 16 | 17 | layers is a vector of nodes in each layer, starting with the visible 18 | layer. 19 | 20 | Ex: [784 500 500 2000] -> 784-500 RBM, a 500-500 RBM, and a 500-2000 21 | RBM" 22 | [layers] 23 | (let [rbms (mapv #(build-rbm %1 %2) (butlast layers) (rest layers))] 24 | (->DBN rbms layers))) 25 | 26 | (defn build-classify-dbn 27 | "Build a Deep Belief Network using Restricted Boltzmann Machines 28 | designed to classify an observation. 29 | 30 | See `build-dbn` for layers usage. classes is the number of possible 31 | classes the observation could be." 32 | [layers classes] 33 | (let [base (build-dbn (butlast layers)) 34 | associative (build-jd-rbm (last (butlast layers)) (last layers) classes)] 35 | (map->CDBN {:rbms (conj (:rbms base) associative) 36 | :layers layers 37 | :classes classes}))) 38 | 39 | (defn train-dbn 40 | "Train a generative Deep Belief Network on a dataset. This trained 41 | model doesn't have an inherent value, unless the trained weights are 42 | subsequently used to initialize another network, e.g. a simple 43 | feedforward neural network. 44 | 45 | dataset is an unlabeled dataset used for unsupervised training. 46 | 47 | mean-field? is a key in the params map, and is a boolean indicating 48 | whether to use the expected value from a hidden layer as the input 49 | to the next RBM in the network, or use the sampled binary 50 | value. Defaults to true. 51 | 52 | query-final? is a key in the params map, and is used to determine if 53 | the final RBm trained is queried for the state of its hidden 54 | layer. This is only used when training the generative layers of a 55 | classification DBN, and changes the return type (both the trained 56 | DBN and the final transformed dataset are returned if this is true). 57 | 58 | See `train-rbm` for details on hyper-parameters passed in the param map." 59 | [dbn dataset params] 60 | (let [{:keys [mean-field? query-final?] 61 | :or {mean-field? true query-final? false}} params 62 | ;; Train the first RBM 63 | rbms (assoc (:rbms dbn) 0 64 | (p/train-model (first (:rbms dbn)) dataset params))] 65 | (loop [rbms rbms 66 | iter 1 67 | data (query-hidden (first rbms) dataset mean-field?)] 68 | (if (>= iter (count rbms)) 69 | (if query-final? 70 | {:dbn (assoc dbn :rbms rbms) :data data} 71 | (assoc dbn :rbms rbms)) 72 | (let [new-rbm (p/train-model (get rbms iter) data params)] 73 | (recur (assoc rbms iter new-rbm) 74 | (inc iter) 75 | ;; Shortcut to prevent a final, unnecessary calculation 76 | (when (or (< (inc iter) (count rbms)) query-final?) 77 | (query-hidden new-rbm data mean-field?)))))))) 78 | 79 | (defn train-classify-dbn 80 | "Train a Deep Belief Network designed to classify data vectors. 81 | 82 | dataset is a softmax-labeled dataset, in the same format as that 83 | produced by deebn.mnist/load-data-with-softmax (the softmax precedes 84 | the data vector). 85 | 86 | Check train-rbm and train-dbn for more information about 87 | parameters." 88 | [dbn dataset params] 89 | (let [{:keys [mean-field?] :or {mean-field? true}} params 90 | softmaxes (m/matrix (s/sel dataset (s/irange) 91 | (range 0 (:classes dbn)))) 92 | {gen-dbn :dbn xform-data :data} 93 | (train-dbn 94 | (assoc dbn :rbms 95 | (vec (butlast (:rbms dbn)))) 96 | (m/matrix (s/sel dataset (s/irange) 97 | (range (:classes dbn) (m/column-count dataset)))) 98 | (assoc params :query-final? true))] 99 | (assoc dbn :rbms 100 | (conj (:rbms gen-dbn) 101 | (p/train-model (last (:rbms dbn)) 102 | (m/join-along 1 softmaxes xform-data) 103 | params))))) 104 | 105 | (extend-protocol p/Trainable 106 | DBN 107 | (train-model [m dataset params] 108 | (train-dbn m dataset params))) 109 | 110 | (extend-protocol p/Trainable 111 | CDBN 112 | (train-model [m dataset params] 113 | (train-classify-dbn m dataset params))) 114 | 115 | 116 | ;;;=========================================================================== 117 | ;;; Testing a DBN trained on a data set 118 | ;;;=========================================================================== 119 | 120 | (defn classify-obv 121 | "Given a DBN and a single observation, return the model's prediction." 122 | [dbn obv] 123 | (let [prop-data (reduce #(query-hidden %2 %1 true) 124 | obv 125 | (butlast (:rbms dbn)))] 126 | (p/classify (last (:rbms dbn)) prop-data))) 127 | 128 | (extend-protocol p/Classify 129 | CDBN 130 | (classify [m obv] 131 | (classify-obv m obv))) 132 | 133 | 134 | (defn test-dbn 135 | "Test a classification Deep Belief Network on a given dataset. 136 | 137 | The dataset should have the label as the last entry in each 138 | observation." 139 | [dbn dataset] 140 | (let [columns (m/column-count dataset) 141 | labels (m/matrix (mapv vector (s/sel dataset (s/irange) (s/end dataset 1)))) 142 | ;; Propagate the dataset up through the lower layers of the DBN 143 | prop-data (reduce #(query-hidden %2 %1 true) 144 | (m/matrix (s/sel dataset 145 | (s/irange) 146 | (range 0 (dec columns)))) 147 | (butlast (:rbms dbn)))] 148 | (p/test-model (last (:rbms dbn)) (m/join-along 1 prop-data labels)))) 149 | 150 | (extend-protocol p/Testable 151 | CDBN 152 | (test-model [m dataset] 153 | (test-dbn m dataset))) 154 | 155 | 156 | ;;;=========================================================================== 157 | ;;; Utility functions for a DBN 158 | ;;;=========================================================================== 159 | 160 | (defn save-dbn 161 | "Save a DBN." 162 | [dbn filepath] 163 | (spit filepath (pr-str dbn))) 164 | 165 | (defn load-dbn 166 | "Load a DBN from disk." 167 | [filepath] 168 | (edn/read-string {:readers {'deebn.rbm.RBM edn->RBM 169 | 'deebn.rbm.CRBM edn->CRBM 170 | 'deebn.dbn.DBN map->DBN 171 | 'deebn.dbn.CDBN map->CDBN}} (slurp filepath))) 172 | -------------------------------------------------------------------------------- /src/deebn/dnn.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.dnn 2 | (:refer-clojure :exclude [+ * -]) 3 | (:require [deebn.protocols :as p] 4 | [deebn.util :refer [sigmoid gen-softmax]] 5 | [clojure.core.matrix :as m] 6 | [clojure.core.matrix.operators :refer [+ * -]] 7 | [clojure.core.matrix.random :as rand] 8 | [clojure.core.matrix.select :as s] 9 | [clojure.tools.reader.edn :as edn]) 10 | (:import java.io.Writer)) 11 | 12 | (m/set-current-implementation :vectorz) 13 | 14 | (defrecord DNN [weights biases layers classes]) 15 | 16 | (defn dbn->dnn 17 | "Given a pretrained Deep Belief Network, use the trained weights and 18 | biases to build a Deep Neural Network." 19 | [dbn classes] 20 | (let [top-layer {:w (m/matrix (repeatedly 21 | (last (:layers dbn)) 22 | #(rand/sample-normal classes))) 23 | :bias (m/zero-vector classes)}] 24 | (map->DNN {:classes classes 25 | :weights (conj (mapv :w (:rbms dbn)) (:w top-layer)) 26 | :biases (conj (mapv :hbias (:rbms dbn)) (:bias top-layer)) 27 | :layers (:layers dbn)}))) 28 | 29 | (defn prop-up 30 | "Given an input matrix, weight matrix, and bias vector, propagate 31 | the signal through the layer." 32 | [input weights bias] 33 | (m/emap sigmoid (+ bias (m/mmul input weights)))) 34 | 35 | (defn feed-forward 36 | "Given an initial input batch and a DNN, feed the batch through the 37 | net, retaining the output of each layer." 38 | [batch dnn] 39 | (reductions #(prop-up %1 (first %2) (second %2)) 40 | batch 41 | (map #(vector %1 %2) (:weights dnn) (:biases dnn)))) 42 | 43 | (defn net-output 44 | "Propagate an input matrix through the network." 45 | [net input] 46 | (m/matrix (reduce #(prop-up %1 (first %2) (second %2)) 47 | input 48 | (mapv #(vector %1 %2) (:weights net) (:biases net))))) 49 | 50 | (defn layer-error 51 | "Calculate the error for a particular layer in a net, given the 52 | weights for the next layer, the error for the next layer, and the 53 | output for the current layer." 54 | [weights next-error output] 55 | (* (m/mmul next-error (m/transpose weights)) (* output (- 1 output)))) 56 | 57 | (defn update-layer 58 | "Update the weights and biases of a layer, given the previous 59 | weights and biases, input coming into the weights, the error for the 60 | layer, the learning rate, and the batch size." 61 | [weights biases input error learning-rate lambda batch-size observations] 62 | (let [weights (- (* weights (- 1 (/ (* learning-rate lambda) observations))) 63 | (* (/ learning-rate batch-size) 64 | (reduce + (mapv m/outer-product 65 | (m/rows input) 66 | (m/rows error))))) 67 | biases (- biases (* (/ learning-rate batch-size) 68 | (reduce + (m/rows error))))] 69 | [weights biases])) 70 | 71 | (defn train-batch 72 | "Given a batch of training data and a DNN, update the weights and 73 | biases accordingly." 74 | [batch dnn observations learning-rate lambda] 75 | (let [data (m/matrix (s/sel batch 76 | (s/irange) 77 | (range 0 (dec (m/column-count batch))))) 78 | targets (m/matrix (mapv #(gen-softmax %1 (:classes dnn)) 79 | (s/sel batch (s/irange) s/end))) 80 | data (feed-forward data dnn) 81 | errors (m/emap #(- %1 %2) 82 | (last data) targets) 83 | errors (reverse (reductions #(layer-error (first %2) %1 (second %2)) 84 | errors 85 | (map #(vector %1 %2) 86 | (reverse (rest (:weights dnn))) 87 | (reverse (butlast (rest data)))))) 88 | updated (mapv #(update-layer %1 %2 %3 %4 89 | learning-rate 90 | lambda 91 | (m/row-count batch) 92 | observations) 93 | (:weights dnn) 94 | (:biases dnn) 95 | (butlast data) 96 | errors)] 97 | (assoc dnn :weights (mapv first updated) :biases (mapv second updated)))) 98 | 99 | (defn train-epoch 100 | "Given a training dataset and a net, train it for one epoch (one 101 | pass over the dataset)." 102 | [net dataset observations learning-rate lambda batch-size] 103 | (loop [net net 104 | batch (m/matrix (s/sel dataset (range 0 batch-size) (s/irange))) 105 | batch-num 0] 106 | (let [start (* batch-num batch-size) 107 | end (min (* (inc batch-num) batch-size) (m/row-count dataset))] 108 | (if (>= start (m/row-count dataset)) 109 | net 110 | (do 111 | (recur (train-batch batch net observations learning-rate lambda) 112 | (m/matrix (s/sel dataset (range start end) (s/irange))) 113 | (inc batch-num))))))) 114 | 115 | (defn train-top-layer 116 | "Pre-train the top logistic regression layer before moving to fine-tuning." 117 | [dnn dataset observations batch-size epochs learning-rate lambda] 118 | (println "Propagating dataset through DNN...") 119 | (let [top-layer {:weights (vector (last (:weights dnn))) 120 | :biases (vector (last (:biases dnn))) 121 | :classes (:classes dnn)} 122 | output (net-output 123 | (assoc dnn 124 | :weights (butlast (:weights dnn)) 125 | :biases (butlast (:biases dnn))) 126 | (m/matrix 127 | (s/sel dataset 128 | (s/irange) 129 | (range 0 (dec (m/column-count dataset)))))) 130 | targets (m/reshape (m/matrix (s/sel dataset (s/irange) s/end)) 131 | [(m/row-count dataset) 1]) 132 | dataset (m/matrix (m/join-along 1 output targets))] 133 | (println "Pre-training logistic regression layer, epoch 1") 134 | (loop [epoch 2 135 | top-layer (train-epoch top-layer dataset observations 136 | learning-rate lambda batch-size)] 137 | (if (> epoch epochs) 138 | (assoc dnn 139 | :weights (assoc (:weights dnn) (dec (count (:weights dnn))) 140 | (first (:weights top-layer))) 141 | :biases (assoc (:biases dnn) (dec (count (:biases dnn))) 142 | (first (:biases top-layer)))) 143 | (do 144 | (println "Pre-training logistic regression layer, epoch" epoch) 145 | (recur (inc epoch) 146 | (train-epoch top-layer dataset observations 147 | learning-rate lambda batch-size))))))) 148 | 149 | (defn train-dnn 150 | "Given a labeled dataset, train a DNN. 151 | 152 | The dataset should have the label as the last element of each input 153 | vector. 154 | 155 | params is a map that may have the following keys: 156 | batch-size: default 100 157 | epochs: default 100 158 | learning-rate: default 0.5 159 | lambda: default 0.1 " 160 | [dnn dataset params] 161 | (let [{:keys [batch-size epochs learning-rate lambda] 162 | :or {batch-size 100 163 | epochs 100 164 | learning-rate 0.5 165 | lambda 0.1}} params 166 | observations (m/row-count dataset) 167 | net (train-top-layer dnn dataset observations batch-size 168 | epochs learning-rate lambda)] 169 | (println "Training epoch 1") 170 | (loop [epoch 2 171 | net (train-epoch net dataset observations 172 | learning-rate lambda batch-size)] 173 | (if (> epoch epochs) 174 | net 175 | (do 176 | (println "\nTraining epoch" epoch) 177 | (recur (inc epoch) 178 | (train-epoch net dataset observations learning-rate 179 | lambda batch-size))))))) 180 | 181 | (extend-protocol p/Trainable 182 | DNN 183 | (train-model [m dataset params] 184 | (train-dnn m dataset params))) 185 | 186 | 187 | ;;;=========================================================================== 188 | ;;; Testing a DNN trained on a data set 189 | ;;;=========================================================================== 190 | 191 | (defn softmax->class 192 | "Get the predicted class from a softmax output." 193 | [x] 194 | (let [largest (m/emax x) 195 | indexed (zipmap x (range (m/row-count x)))] 196 | (get indexed largest))) 197 | 198 | (defn classify-obv 199 | "Given a DNN and a single observation, return the model's prediction." 200 | [dnn obv] 201 | (softmax->class (net-output dnn obv))) 202 | 203 | (extend-protocol p/Classify 204 | DNN 205 | (classify [m obv] 206 | (classify-obv m obv))) 207 | 208 | (defn test-dnn 209 | "Test a Deep Neural Network on a dataset. Returns an error percentage. 210 | 211 | dataset should have the label as the last entry in each observation." 212 | [dnn dataset] 213 | (let [num-observations (m/row-count dataset) 214 | predictions (mapv #(softmax->class (net-output dnn %1)) 215 | (m/matrix (s/sel 216 | dataset 217 | (s/irange) 218 | (range 0 (dec (m/column-count dataset)))))) 219 | errors (mapv #(if (== (last %1) %2) 0 1) dataset predictions)] 220 | (double (/ (m/esum errors) num-observations)))) 221 | 222 | (extend-protocol p/Testable 223 | DNN 224 | (test-model [m dataset] 225 | (test-dnn m dataset))) 226 | 227 | ;;;=========================================================================== 228 | ;;; Utility functions for a DNN 229 | ;;;=========================================================================== 230 | 231 | (defmethod clojure.core/print-method DNN print-DNN [dnn ^Writer w] 232 | (.write w (str "#deebn.dnn.DNN {" 233 | " :weights " (mapv m/to-nested-vectors (:weights dnn)) 234 | " :biases " (mapv m/to-nested-vectors (:biases dnn)) 235 | " :layers " (:layers dnn) 236 | " :classes " (:classes dnn) 237 | " }"))) 238 | 239 | (defn save-dnn 240 | "Save a DNN to disk." 241 | [dnn filepath] 242 | (spit filepath (pr-str dnn))) 243 | 244 | (defn edn->DNN 245 | "The default map->DNN function provided by the defrecord doesn't 246 | provide us with the performant implementation (i.e. matrices and 247 | arrays from core.matrix), so this function adds a small step to 248 | ensure that." 249 | [data] 250 | (->DNN (mapv m/matrix (:weights data)) 251 | (mapv m/matrix (:biases data)) 252 | (:layers data) 253 | (:classes data))) 254 | 255 | (defn load-dnn 256 | "Load a DNN from disk." 257 | [filepath] 258 | (edn/read-string {:readers {'deebn.dnn.DNN edn->DNN}} (slurp filepath))) 259 | -------------------------------------------------------------------------------- /src/deebn/mnist.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.mnist 2 | (:require [clojure.core.matrix :as matrix] 3 | [clojure.core.matrix.select :as select] 4 | [clojure.data.csv :as csv] 5 | [clojure.java.io :as io] 6 | [deebn.util :refer [softmax-from-obv]])) 7 | 8 | (matrix/set-current-implementation :vectorz) 9 | 10 | (defn scale-data 11 | "Scale the input parameters to [0-1]. This assumes that the label is 12 | the first element. After scaling, the label is the last element." 13 | [x] 14 | (conj (mapv #(/ % 255.0) (rest x)) (first x))) 15 | 16 | (defn load-data 17 | "Load a MNIST CSV data set." 18 | [filepath] 19 | (let [data (with-open [in-file (io/reader filepath)] 20 | (->> in-file 21 | (csv/read-csv) 22 | (matrix/emap read-string) 23 | (mapv scale-data)))] 24 | (matrix/matrix data))) 25 | 26 | (defn load-data-sans-label 27 | "Load a MNIST CSV data set without the label" 28 | [filepath] 29 | (let [data (load-data filepath)] 30 | (matrix/matrix (select/sel data (select/irange) (range 0 784))))) 31 | 32 | (defn load-data-with-softmax 33 | "Load a dataset with the class label expanded to a softmax-appropriate form. 34 | Example: In the MNIST dataset, class '7' expands to -> '0 0 0 0 0 0 0 1 0 0" 35 | [filepath] 36 | (let [data (load-data filepath) 37 | data (mapv #(softmax-from-obv % 10) (matrix/rows data))] 38 | (matrix/matrix data))) 39 | -------------------------------------------------------------------------------- /src/deebn/protocols.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.protocols) 2 | 3 | (defprotocol Trainable 4 | "Protocol for models that are trainable with a dataset." 5 | (train-model [m dataset params] 6 | "Train a model, given a dataset and relevant 7 | hyper-parameters. Refer to individual training functions for 8 | hyper-parameter details.")) 9 | 10 | (defprotocol Testable 11 | "Protocol for models that are testable." 12 | (test-model [m dataset] 13 | "Test a trained model given a dataset. The dataset may need to be 14 | in a different format from that used to train.")) 15 | 16 | (defprotocol Classify 17 | "Protocol for models that can classify a given observation." 18 | (classify [m obv] 19 | "Use the given model to classify a single observation.")) 20 | -------------------------------------------------------------------------------- /src/deebn/rbm.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.rbm 2 | (:refer-clojure :exclude [+ - * / ==]) 3 | (:require [deebn.protocols :refer [Testable Trainable Classify]] 4 | [deebn.util :refer [bernoulli gen-softmax 5 | get-min-position sigmoid]] 6 | [clojure.core.matrix :as m] 7 | [clojure.core.matrix.operators :refer [+ - * / ==]] 8 | [clojure.core.matrix.select :as s] 9 | [clojure.core.matrix.random :as rand] 10 | [clojure.core.matrix.stats :as stats] 11 | [clojure.set :refer [difference]] 12 | [clojure.tools.reader.edn :as edn]) 13 | (:import java.io.Writer)) 14 | 15 | (m/set-current-implementation :vectorz) 16 | 17 | ;;;=========================================================================== 18 | ;;; Generate Restricted Boltzmann Machines 19 | ;;; ========================================================================== 20 | ;; We define a purely generative RBM, trained without any class 21 | ;; labels, and a classification RBM 22 | (defrecord RBM [w vbias hbias w-vel vbias-vel hbias-vel visible hidden]) 23 | (defrecord CRBM [w vbias hbias w-vel vbias-vel hbias-vel visible hidden classes]) 24 | 25 | (defn build-rbm 26 | "Factory function to produce an RBM record." 27 | [visible hidden] 28 | (let [w (m/matrix (repeatedly visible #(/ (rand/sample-normal hidden) 100))) 29 | w-vel (m/zero-matrix visible hidden) 30 | ;; TODO: The visual biases should really be set to 31 | ;; log(p_i/ (1 - p_i)), where p_i is the proportion of 32 | ;; training vectors in which unit i is turned on. 33 | vbias (m/zero-vector visible) 34 | hbias (m/array (repeat hidden -4)) 35 | vbias-vel (m/zero-vector visible) 36 | hbias-vel (m/zero-vector hidden)] 37 | (->RBM w vbias hbias w-vel vbias-vel hbias-vel visible hidden))) 38 | 39 | (defn build-jd-rbm 40 | "Factory function to build a joint density RBM for testing purposes. 41 | 42 | This RBM has two sets of visible units - the typical set 43 | representing each observation in the data set, and a softmax unit 44 | representing the label for each observation. These are combined, and 45 | the label becaomes part of the input vector." 46 | [visible hidden classes] 47 | (let [rbm (build-rbm (+ visible classes) hidden)] 48 | (map->CRBM (assoc rbm :classes classes)))) 49 | 50 | 51 | ;;;=========================================================================== 52 | ;;; Train an RBM 53 | ;;; ========================================================================== 54 | 55 | 56 | (defn update-weights 57 | "Determine the weight gradient from this batch" 58 | [ph ph2 batch pv] 59 | (reduce + (map #(- (m/outer-product %1 %2) (m/outer-product %3 %4)) 60 | (m/rows batch) (m/rows ph) 61 | (m/rows pv) (m/rows ph2)))) 62 | 63 | ;; TODO: Implement CD-K - currently CD-1 is hard-coded. 64 | (defn update-rbm 65 | "Single batch step update of RBM parameters" 66 | [batch rbm learning-rate momentum] 67 | (let [batch-size (m/row-count batch) 68 | ph (m/emap sigmoid (+ (:hbias rbm) (m/mmul batch (:w rbm)))) 69 | h (m/emap bernoulli ph) 70 | pv (m/emap sigmoid (+ (:vbias rbm) 71 | (m/mmul h (m/transpose (:w rbm))))) 72 | v (m/emap bernoulli pv) 73 | ph2 (m/emap sigmoid (+ (:hbias rbm) (m/mmul v (:w rbm)))) 74 | delta-w (/ (update-weights ph ph2 batch pv) batch-size) 75 | delta-vbias (/ (reduce + (map #(- % %2) 76 | (m/rows batch) 77 | (m/rows pv))) 78 | batch-size) 79 | delta-hbias (/ (reduce + (map #(- % %2) 80 | (m/rows h) 81 | (m/rows ph2))) 82 | batch-size) 83 | w-vel (+ (* momentum (:w-vel rbm)) (* learning-rate delta-w)) 84 | vbias-vel (+ (* momentum (:vbias-vel rbm)) 85 | (* learning-rate delta-vbias)) 86 | hbias-vel (+ (* momentum (:hbias-vel rbm)) 87 | (* learning-rate delta-hbias))] 88 | (assoc rbm 89 | :w (+ (:w rbm) w-vel) 90 | :vbias (+ (:vbias rbm) vbias-vel) 91 | :hbias (+ (:hbias rbm) hbias-vel) 92 | :w-vel w-vel :vbias-vel vbias-vel :hbias-vel hbias-vel))) 93 | 94 | (defn train-epoch 95 | "Train a single epoch" 96 | [rbm dataset learning-rate momentum batch-size] 97 | (loop [rbm rbm 98 | batch (m/matrix (s/sel dataset (range 0 batch-size) (s/irange))) 99 | batch-num 1] 100 | (let [start (* (dec batch-num) batch-size) 101 | end (min (* batch-num batch-size) (m/row-count dataset))] 102 | (if (>= start (m/row-count dataset)) 103 | rbm 104 | (do 105 | (print ".") 106 | (flush) 107 | (recur (update-rbm batch rbm learning-rate momentum) 108 | (m/matrix (s/sel dataset (range start end) (s/irange))) 109 | (inc batch-num))))))) 110 | 111 | (defn select-overfitting-sets 112 | "Given a dataset, attempt to choose reasonable validation and test 113 | sets to monitor overfitting." 114 | [dataset] 115 | (let [obvs (m/row-count dataset) 116 | validation-indices (set (repeatedly (/ obvs 100) #(rand-int obvs))) 117 | validations (m/matrix (s/sel dataset 118 | (vec validation-indices) (s/irange))) 119 | train-indices (difference 120 | (set (repeatedly (/ obvs 100) 121 | #(rand-int obvs))) validation-indices) 122 | train-sample (m/matrix (s/sel dataset (vec train-indices) (s/irange)))] 123 | {:validations validations 124 | :train-sample train-sample 125 | :dataset (s/sel dataset (s/exclude (vec validation-indices)) 126 | (s/irange))})) 127 | 128 | (defn free-energy 129 | "Compute the free energy of a given visible vector and RBM. Lower is 130 | better." 131 | [x rbm] 132 | (let [hidden-input (+ (:hbias rbm) (m/mmul x (:w rbm)))] 133 | (- (- (m/mmul x (:vbias rbm))) 134 | (reduce + (mapv #(Math/log (+ 1 (Math/exp %))) hidden-input))))) 135 | 136 | (defn check-overfitting 137 | "Given an rbm, a sample from the training set, and a validation set, 138 | determine if the model is starting to overfit the data. This is 139 | measured by a difference in the average free energy over the 140 | training set sample and the validation set." 141 | [rbm train-sample validations] 142 | (let [avg-train-energy (stats/mean (pmap #(free-energy %1 rbm) 143 | (m/rows train-sample))) 144 | avg-validation-energy (stats/mean (pmap #(free-energy %1 rbm) 145 | (m/rows validations)))] 146 | (Math/abs ^Double (- avg-train-energy avg-validation-energy)))) 147 | 148 | (defn train-rbm 149 | "Given a training set, train an RBM 150 | 151 | params is a map with various options: 152 | learning-rate: defaults to 0.1 153 | initial-momentum: starting momentum. Defaults to 0.5 154 | momentum: momentum after `momentum-delay` epochs have passed. Defaults to 0.9 155 | momentum-delay: epochs after which `momentum` is used instead of 156 | `initial-momentum`. Defaults to 3 157 | batch-size: size of each mini-batch. Defaults to 10 158 | epochs: number of times to train the model over the entire training set. 159 | Defaults to 100 160 | gap-delay: number of epochs elapsed before early stopping is considered 161 | gap-stop-delay: number of sequential epochs where energy gap is increasing 162 | before stopping" 163 | [rbm dataset params] 164 | (let [{:keys [validations train-sample dataset]} 165 | (select-overfitting-sets dataset) 166 | {:keys [learning-rate initial-momentum momentum momentum-delay 167 | batch-size epochs gap-delay gap-stop-delay] 168 | :or {learning-rate 0.1 169 | initial-momentum 0.5 170 | momentum 0.9 171 | momentum-delay 3 172 | batch-size 10 173 | epochs 100 174 | gap-delay 10 175 | gap-stop-delay 2}} params] 176 | (println "Training epoch 1") 177 | (loop [rbm (train-epoch rbm dataset learning-rate 178 | initial-momentum batch-size) 179 | epoch 2 180 | energy-gap (check-overfitting rbm train-sample validations) 181 | gap-inc-count 0] 182 | (if (> epoch epochs) 183 | rbm 184 | (do (println "\nTraining epoch" epoch) 185 | (let [curr-momentum (if (> epoch momentum-delay) 186 | momentum initial-momentum) 187 | rbm (train-epoch rbm dataset learning-rate 188 | curr-momentum batch-size) 189 | gap-after-train (check-overfitting rbm train-sample 190 | validations) 191 | _ (println "\nGap pre-train:" energy-gap 192 | "After train:" gap-after-train)] 193 | (if (and (>= epoch gap-delay) 194 | (neg? (- energy-gap gap-after-train)) 195 | (>= gap-inc-count gap-stop-delay)) 196 | rbm 197 | (recur rbm 198 | (inc epoch) 199 | gap-after-train 200 | (if (neg? (- energy-gap gap-after-train)) 201 | (inc gap-inc-count) 202 | 0))))))))) 203 | 204 | (extend-protocol Trainable 205 | CRBM 206 | (train-model [m dataset params] 207 | (train-rbm m dataset params)) 208 | RBM 209 | (train-model [m dataset params] 210 | (train-rbm m dataset params))) 211 | 212 | 213 | ;;;=========================================================================== 214 | ;;; Testing an RBM trained on a data set 215 | ;;;=========================================================================== 216 | 217 | (defn get-prediction 218 | "For a given observation and RBM, return the predicted class." 219 | [x rbm num-classes labeled?] 220 | (let [softmax-cases (mapv #(gen-softmax % num-classes) (range num-classes)) 221 | trials (m/matrix (mapv #(m/join % %2) softmax-cases 222 | (if labeled? 223 | (repeat (butlast x)) 224 | (repeat x)))) 225 | results (mapv #(free-energy % rbm) trials)] 226 | (get-min-position results))) 227 | 228 | (extend-protocol Classify 229 | CRBM 230 | (classify [m obv] 231 | (get-prediction obv m (:classes m) false))) 232 | 233 | (defn test-rbm 234 | "Test a joint density RBM trained on a data set. Returns an error 235 | percentage. 236 | 237 | dataset should have the label as the last entry in each 238 | observation." 239 | [rbm dataset num-classes] 240 | (let [num-observations (m/row-count dataset) 241 | predictions (pmap #(get-prediction % rbm num-classes true) dataset) 242 | errors (mapv #(if (== (last %) %2) 0 1) dataset predictions) 243 | total (m/esum errors)] 244 | (double (/ total num-observations)))) 245 | 246 | (extend-protocol Testable 247 | CRBM 248 | (test-model [m dataset] 249 | (test-rbm m dataset (:classes m)))) 250 | 251 | 252 | ;;;=========================================================================== 253 | ;;; Utility functions for an RBM 254 | ;;;=========================================================================== 255 | 256 | ;; This is designed for EDN printing, not actually visualizing the RBM 257 | ;; at the REPL (this is only needed because similar methods are not 258 | ;; defined for clojure.core.matrix implementations) 259 | (defmethod clojure.core/print-method RBM print-RBM [rbm ^Writer w] 260 | (.write w (str "#deebn.rbm.RBM {" 261 | " :w " (m/to-nested-vectors (:w rbm)) 262 | " :vbias " (m/to-nested-vectors (:vbias rbm)) 263 | " :hbias " (m/to-nested-vectors (:hbias rbm)) 264 | " :w-vel " (m/to-nested-vectors (:w-vel rbm)) 265 | " :vbias-vel " (m/to-nested-vectors (:vbias-vel rbm)) 266 | " :hbias-vel " (m/to-nested-vectors (:hbias-vel rbm)) 267 | " :visible " (:visible rbm) 268 | " :hidden " (:hidden rbm) 269 | " }"))) 270 | 271 | (defmethod clojure.core/print-method CRBM print-CRBM [rbm ^Writer w] 272 | (.write w (str "#deebn.rbm.CRBM {" 273 | " :w " (m/to-nested-vectors (:w rbm)) 274 | " :vbias " (m/to-nested-vectors (:vbias rbm)) 275 | " :hbias " (m/to-nested-vectors (:hbias rbm)) 276 | " :w-vel " (m/to-nested-vectors (:w-vel rbm)) 277 | " :vbias-vel " (m/to-nested-vectors (:vbias-vel rbm)) 278 | " :hbias-vel " (m/to-nested-vectors (:hbias-vel rbm)) 279 | " :visible " (:visible rbm) 280 | " :hidden " (:hidden rbm) 281 | " :classes " (:classes rbm) 282 | " }"))) 283 | 284 | (defn save-rbm 285 | "Save a RBM to disk." 286 | [rbm filepath] 287 | (spit filepath (pr-str rbm))) 288 | 289 | (defn edn->RBM 290 | "The default map->RBM function provided by the defrecord doesn't 291 | provide us with the performant implementation (i.e. matrices and 292 | arrays from core.matrix), so this function adds a small step to 293 | ensure that." 294 | [data] 295 | (->RBM (m/matrix (:w data)) 296 | (m/matrix (:vbias data)) 297 | (m/matrix (:hbias data)) 298 | (m/matrix (:w-vel data)) 299 | (m/matrix (:vbias-vel data)) 300 | (m/matrix (:hbias-vel data)) 301 | (:visible data) 302 | (:hidden data))) 303 | 304 | (defn edn->CRBM 305 | "The default map->RBM function provided by the defrecord doesn't 306 | provide us with the performant implementation (i.e. matrices and 307 | arrays from core.matrix), so this function adds a small step to 308 | ensure that." 309 | [data] 310 | (->CRBM (m/matrix (:w data)) 311 | (m/matrix (:vbias data)) 312 | (m/matrix (:hbias data)) 313 | (m/matrix (:w-vel data)) 314 | (m/matrix (:vbias-vel data)) 315 | (m/matrix (:hbias-vel data)) 316 | (:visible data) 317 | (:hidden data) 318 | (:classes data))) 319 | 320 | (defn load-rbm 321 | "Load a RBM from disk." 322 | [filepath] 323 | (edn/read-string {:readers {'deebn.rbm.RBM edn->RBM 324 | 'deebn.rbm.CRBM edn->CRBM}} (slurp filepath))) 325 | -------------------------------------------------------------------------------- /src/deebn/util.clj: -------------------------------------------------------------------------------- 1 | (ns deebn.util 2 | (:refer-clojure :exclude [+ - * /]) 3 | (:require [clojure.core.matrix :as m] 4 | [clojure.core.matrix.operators :refer [+ - * /]]) 5 | (:import mikera.vectorz.Scalar)) 6 | 7 | (defn bernoulli 8 | "Take a single Bernoulli sample, given a probability" 9 | [p] 10 | (if (> (rand) p) 0 1)) 11 | 12 | (defn sigmoid 13 | "Sigmoid function, used as an activation function for nodes in a 14 | network." 15 | [^double x] 16 | (/ (inc (Math/exp (- x))))) 17 | 18 | (defn query-hidden 19 | "Given an RBM and an input vector, query the RBM for the state of 20 | the hidden nodes." 21 | [rbm x mean-field?] 22 | (let [pre-sample (m/emap sigmoid (+ (:hbias rbm) (m/mmul x (:w rbm))))] 23 | (if mean-field? pre-sample 24 | (map bernoulli pre-sample)))) 25 | 26 | (defn gen-softmax 27 | "Generate a softmax output. x is the class represented by the 28 | output, with 0 represented by the first element in the vector." 29 | [x num-classes] 30 | (m/mset (m/zero-vector num-classes) x 1.0)) 31 | 32 | (defn softmax-from-obv 33 | "Given an observation with label attached, replace the label value 34 | with an appropriate softmax unit. This assumes that the label is the 35 | last element in an observation." 36 | [x num-classes] 37 | (let [label (last x) 38 | obv (butlast x) 39 | new-label (gen-softmax label num-classes)] 40 | (vec (concat new-label obv)))) 41 | 42 | (defn get-min-position 43 | "Get the position of the minimum element of a collection." 44 | [x] 45 | (if (not (empty? x)) 46 | (let [least (m/emin x) 47 | indexed (zipmap (map #(.get ^Scalar %) x) (range (count x)))] 48 | (get indexed least)))) 49 | 50 | (defn random-subset 51 | "Return a matrix of a n rows randomly selected from a dataset." 52 | [n dataset] 53 | (m/matrix (take n (shuffle (m/rows dataset))))) 54 | --------------------------------------------------------------------------------