├── .gitignore
├── LICENSE
├── README.md
├── project.clj
└── src
    └── deebn
        ├── core.clj
        ├── cv.clj
        ├── dbn.clj
        ├── dnn.clj
        ├── mnist.clj
        ├── protocols.clj
        ├── rbm.clj
        └── util.clj


/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | data
11 | models
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deebn
 2 | 
 3 | A Clojure library implementing a Deep Belief Network using Restricted
 4 | Boltzmann Machines, based on [Geoffery Hinton's work][work]. This
 5 | library is the result of my thesis research into deep learning methods.
 6 | 
 7 | ## "Installation"
 8 | `deebn` is available for download or usage through your favorite dependency management tool from
 9 | Clojars: 
10 | 
11 | [![Clojars Project](http://clojars.org/deebn/latest-version.svg)](http://clojars.org/deebn)
12 | 
13 | ## Capabilities
14 | 
15 | There are a few types of model that you can build and train, either
16 | for classification or as components of other models:
17 | 
18 | - Restricted Boltzmann Machine
19 |   - can be used as a component of a Deep Belief Network, or as a standalone discriminatory classifer  
20 |   Hyper-parameters:
21 |     - learning rate
22 |     - initial momentum
23 |     - momentum (used after 'momentum-delay' epochs)
24 |     - momentum-delay
25 |     - batch-size
26 |     - epochs
27 |     - gap-delay (epochs to wait before testing for early stopping)
28 |     - gap-stop-delay (consecutive positive energy gap epochs that initiate
29 |       an early stop)
30 | - Deep Belief Network (composed of layers of RBMs)
31 |   - Can be used to pre-train a Deep Neural Network, or as a discriminatory classifier
32 |     (Note: a classification DBN is not fine-tuned - performance is sastifactory but not optimal)  
33 |   Hyper-parameters:
34 |     - whether to use activations rather than samples from hidden layers when propagating
35 |       to the next layer
36 | - Deep Neural Network
37 |   - Initialized from a pre-trained DBN, with an additional logistic regression layer added
38 |   - Network output is a softmax unit
39 |   - Logistic regression unit is pre-trained with output from the DBN before moving to a 
40 |     full backprop training regimen  
41 |   Hyper-parameters:
42 |     - batch-size
43 |     - epochs
44 |     - learning rate
45 |     - lambda - L2 regularization (weight decay) parameter
46 | 
47 | ## Usage
48 | 
49 | The `core` namespace aims to offer examples of using the library. The
50 | `mnist` namespace offers examples for bringing in datasets (in this case
51 | the [MNIST][mnist] dataset).
52 | 
53 | ## License
54 | 
55 | Copyright © 2014 Chris Sims
56 | 
57 | Distributed under the Eclipse Public License either version 1.0 or (at
58 | your option) any later version.
59 | 
60 | [work]: http://www.cs.toronto.edu/~hinton/absps/montrealTR.pdf
61 | [mnist]: http://yann.lecun.com/exdb/mnist/
62 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject deebn "0.1.1-SNAPSHOT"
 2 |   :description "Deep Belief Network using Restricted Boltzmann Machines"
 3 |   :url "https://jcsi.ms"
 4 |   :license {:name "Eclipse Public License"
 5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
 6 |   :dependencies [[org.clojure/clojure "1.6.0"]
 7 |                  [org.clojure/tools.reader "0.8.15"]
 8 |                  [org.clojure/data.csv "0.1.2"]
 9 |                  [net.mikera/vectorz-clj "0.29.0"]
10 |                  [net.mikera/core.matrix "0.33.2"]
11 |                  [net.mikera/core.matrix.stats "0.5.0"]
12 |                  [com.taoensso/timbre "3.4.0"]]
13 |   :profiles {:dev {:dependencies [[org.clojure/test.check "0.7.0"]]}}
14 |   :jvm-opts ["-Xmx4g" "-Xms3g"]
15 |   :deploy-repositories [["releases" :clojars]])
16 | 


--------------------------------------------------------------------------------
/src/deebn/core.clj:
--------------------------------------------------------------------------------
 1 | (ns deebn.core
 2 |   (:require [deebn.dbn :refer [build-dbn build-classify-dbn]]
 3 |             [deebn.dnn :refer [dbn->dnn]]
 4 |             [deebn.mnist :refer [load-data-sans-label
 5 |                                  load-data-with-softmax load-data]]
 6 |             [deebn.protocols :refer [train-model test-model classify]]
 7 |             [deebn.rbm :refer [build-rbm build-jd-rbm]]
 8 |             [clojure.core.matrix :refer [set-current-implementation]]))
 9 | 
10 | (set-current-implementation :vectorz)
11 | 
12 | (comment
13 |   ;;; Choose a model and the corresponding dataset
14 |   ;; A single Restricted Boltzmann Machine used for classification
15 |   (def m (build-jd-rbm 784 500 10))
16 |   (def dataset (load-data-with-softmax "data/mnist_train.csv"))
17 |   ;; A classification Deep Belief Network
18 |   (def m (build-classify-dbn [784 500 500 2000] 10))
19 |   (def dataset (load-data-with-softmax "data/mnist_train.csv"))
20 |   ;; A Deep Neural Network backed by a pre-trained Deep Belief Network
21 |   (def m (build-dbn [784 500 500 250]))
22 |   (def dataset (load-data-sans-label "data/mnist_train.csv"))
23 | 
24 |   ;;; Train the model
25 |   (def m (train-model m dataset {:batch-size 100}))
26 |   ;; For a DNN, the DBN is converted to a DNN before fine-tuning
27 |   (def m (dbn->dnn m 10))
28 |   (def dataset (load-data "data/mnist_train.csv"))
29 |   (def m (train-model m dataset {:batch-size 100 :epochs 10}))
30 | 
31 |   ;;; Test the model
32 |   (def test-dataset (load-data "data/mnist_test.csv"))
33 |   (test-model m test-dataset)
34 | 
35 |   ;;; Classify a single observation
36 |   (def dv (first (load-data-sans-label "data/mnist_test.csv")))
37 |   (classify m dv)
38 |   )
39 | 


--------------------------------------------------------------------------------
/src/deebn/cv.clj:
--------------------------------------------------------------------------------
 1 | (ns deebn.cv
 2 |   (:require [clojure.core.matrix :as m]
 3 |             [clojure.core.matrix.select :as s]
 4 |             [deebn.protocols :refer [train-model test-model]]
 5 |             [deebn.dnn :refer [dbn->dnn]]))
 6 | 
 7 | (defn select-folds
 8 |   "Returns a vector of vectors specifying holdout observations to form
 9 |   k folds in a dataset."
10 |   [dataset k]
11 |   (let [rows (vec (range (m/row-count dataset)))
12 |         part-size (int (Math/ceil (/ (count rows) k)))]
13 |     (vec (partition part-size part-size nil (shuffle rows)))))
14 | 
15 | (defn k-fold-cross-validation
16 |   "Perform k-fold cross-validation on a training model. A single data
17 |   set is provided, and a vector of error percentages is
18 |   returned. Since training data and test data are in potentially
19 |   different formats, pass both in for use. These should be two formats
20 |   of the same data set to use for validation."
21 |   [model train-data test-data params k]
22 |   (let [holdouts (select-folds train-data k)]
23 |     (mapv (fn [holdouts]
24 |             (let [train-folds (s/sel train-data (s/exclude holdouts) (s/irange))
25 |                   test-fold (s/sel test-data holdouts (s/irange))
26 |                   m (train-model model train-folds params)]
27 |               (test-model m test-fold)))
28 |           holdouts)))
29 | 
30 | (defn k-fold-cross-validation-dnn
31 |   "Perform k-fold cross-validation on a training model. A single data
32 |   set is provided, and a vector of error percentages is
33 |   returned. Since training data and test data are in potentially
34 |   different formats, pass both in for use. These should be two formats
35 |   of the same data set to use for validation. DNNs require two
36 |   training steps, and so require a slightly different validation
37 |   approach."
38 |   [model train-data test-data params k classes]
39 |   (let [holdouts (select-folds train-data k)]
40 |     (mapv (fn [holdouts]
41 |             (let [train-folds (m/matrix (s/sel train-data
42 |                                                (s/exclude holdouts) (s/irange)))
43 |                   test-fold (m/matrix (s/sel test-data holdouts (s/irange)))
44 |                   m (train-model model train-folds params)
45 |                   m (dbn->dnn m classes)
46 |                   train-folds (m/matrix (s/sel test-data (s/exclude holdouts)
47 |                                                (s/irange)))
48 |                   m (train-model m train-folds params)]
49 |               (test-model m test-fold)))
50 |           holdouts)))
51 | 


--------------------------------------------------------------------------------
/src/deebn/dbn.clj:
--------------------------------------------------------------------------------
  1 | (ns deebn.dbn
  2 |   (:require [deebn.protocols :as p]
  3 |             [deebn.rbm :refer [build-rbm build-jd-rbm edn->CRBM edn->RBM]]
  4 |             [deebn.util :refer [query-hidden]]
  5 |             [clojure.core.matrix :as m]
  6 |             [clojure.tools.reader.edn :as edn]
  7 |             [clojure.core.matrix.select :as s]))
  8 | 
  9 | (defrecord DBN [rbms layers])
 10 | (defrecord CDBN [rbms layers classes])
 11 | 
 12 | (m/set-current-implementation :vectorz)
 13 | 
 14 | (defn build-dbn
 15 |   "Build a Deep Belief Network composed of Restricted Boltzmann Machines.
 16 | 
 17 |   layers is a vector of nodes in each layer, starting with the visible
 18 |   layer.
 19 | 
 20 |   Ex: [784 500 500 2000] -> 784-500 RBM, a 500-500 RBM, and a 500-2000
 21 |   RBM"
 22 |   [layers]
 23 |   (let [rbms (mapv #(build-rbm %1 %2) (butlast layers) (rest layers))]
 24 |     (->DBN rbms layers)))
 25 | 
 26 | (defn build-classify-dbn
 27 |   "Build a Deep Belief Network using Restricted Boltzmann Machines
 28 |   designed to classify an observation.
 29 | 
 30 |   See `build-dbn` for layers usage. classes is the number of possible
 31 |   classes the observation could be."
 32 |   [layers classes]
 33 |   (let [base (build-dbn (butlast layers))
 34 |         associative (build-jd-rbm (last (butlast layers)) (last layers) classes)]
 35 |     (map->CDBN {:rbms (conj (:rbms base) associative)
 36 |                 :layers layers
 37 |                 :classes classes})))
 38 | 
 39 | (defn train-dbn
 40 |   "Train a generative Deep Belief Network on a dataset. This trained
 41 |   model doesn't have an inherent value, unless the trained weights are
 42 |   subsequently used to initialize another network, e.g. a simple
 43 |   feedforward neural network.
 44 | 
 45 |   dataset is an unlabeled dataset used for unsupervised training.
 46 | 
 47 |   mean-field? is a key in the params map, and is a boolean indicating
 48 |   whether to use the expected value from a hidden layer as the input
 49 |   to the next RBM in the network, or use the sampled binary
 50 |   value. Defaults to true.
 51 | 
 52 |   query-final? is a key in the params map, and is used to determine if
 53 |   the final RBm trained is queried for the state of its hidden
 54 |   layer. This is only used when training the generative layers of a
 55 |   classification DBN, and changes the return type (both the trained
 56 |   DBN and the final transformed dataset are returned if this is true).
 57 | 
 58 |   See `train-rbm` for details on hyper-parameters passed in the param map."
 59 |   [dbn dataset params]
 60 |   (let [{:keys [mean-field? query-final?]
 61 |          :or {mean-field? true query-final? false}} params
 62 |          ;; Train the first RBM
 63 |          rbms (assoc (:rbms dbn) 0
 64 |                      (p/train-model (first (:rbms dbn)) dataset params))]
 65 |     (loop [rbms rbms
 66 |            iter 1
 67 |            data (query-hidden (first rbms) dataset mean-field?)]
 68 |       (if (>= iter (count rbms))
 69 |         (if query-final?
 70 |           {:dbn (assoc dbn :rbms rbms) :data data}
 71 |           (assoc dbn :rbms rbms))
 72 |         (let [new-rbm (p/train-model (get rbms iter) data params)]
 73 |           (recur (assoc rbms iter new-rbm)
 74 |                  (inc iter)
 75 |                  ;; Shortcut to prevent a final, unnecessary calculation
 76 |                  (when (or (< (inc iter) (count rbms)) query-final?)
 77 |                    (query-hidden new-rbm data mean-field?))))))))
 78 | 
 79 | (defn train-classify-dbn
 80 |   "Train a Deep Belief Network designed to classify data vectors.
 81 | 
 82 |   dataset is a softmax-labeled dataset, in the same format as that
 83 |   produced by deebn.mnist/load-data-with-softmax (the softmax precedes
 84 |   the data vector).
 85 | 
 86 |   Check train-rbm and train-dbn for more information about
 87 |   parameters."
 88 |   [dbn dataset params]
 89 |   (let [{:keys [mean-field?] :or {mean-field? true}} params
 90 |         softmaxes (m/matrix (s/sel dataset (s/irange)
 91 |                                    (range 0 (:classes dbn))))
 92 |         {gen-dbn :dbn xform-data :data}
 93 |         (train-dbn
 94 |          (assoc dbn :rbms
 95 |                 (vec (butlast (:rbms dbn))))
 96 |          (m/matrix (s/sel dataset (s/irange)
 97 |                           (range (:classes dbn) (m/column-count dataset))))
 98 |          (assoc params :query-final? true))]
 99 |     (assoc dbn :rbms
100 |            (conj (:rbms gen-dbn)
101 |                  (p/train-model (last (:rbms dbn))
102 |                                 (m/join-along 1 softmaxes xform-data)
103 |                                 params)))))
104 | 
105 | (extend-protocol p/Trainable
106 |   DBN
107 |   (train-model [m dataset params]
108 |     (train-dbn m dataset params)))
109 | 
110 | (extend-protocol p/Trainable
111 |   CDBN
112 |   (train-model [m dataset params]
113 |     (train-classify-dbn m dataset params)))
114 | 
115 | 
116 | ;;;===========================================================================
117 | ;;; Testing a DBN trained on a data set
118 | ;;;===========================================================================
119 | 
120 | (defn classify-obv
121 |   "Given a DBN and a single observation, return the model's prediction."
122 |   [dbn obv]
123 |   (let [prop-data (reduce #(query-hidden %2 %1 true)
124 |                           obv
125 |                           (butlast (:rbms dbn)))]
126 |     (p/classify (last (:rbms dbn)) prop-data)))
127 | 
128 | (extend-protocol p/Classify
129 |   CDBN
130 |   (classify [m obv]
131 |     (classify-obv m obv)))
132 | 
133 | 
134 | (defn test-dbn
135 |   "Test a classification Deep Belief Network on a given dataset.
136 | 
137 |   The dataset should have the label as the last entry in each
138 |   observation."
139 |   [dbn dataset]
140 |   (let [columns (m/column-count dataset)
141 |         labels (m/matrix (mapv vector (s/sel dataset (s/irange) (s/end dataset 1))))
142 |         ;; Propagate the dataset up through the lower layers of the DBN
143 |         prop-data (reduce #(query-hidden %2 %1 true)
144 |                           (m/matrix (s/sel dataset
145 |                                            (s/irange)
146 |                                            (range 0 (dec columns))))
147 |                           (butlast (:rbms dbn)))]
148 |     (p/test-model (last (:rbms dbn)) (m/join-along 1 prop-data labels))))
149 | 
150 | (extend-protocol p/Testable
151 |   CDBN
152 |   (test-model [m dataset]
153 |     (test-dbn m dataset)))
154 | 
155 | 
156 | ;;;===========================================================================
157 | ;;; Utility functions for a DBN
158 | ;;;===========================================================================
159 | 
160 | (defn save-dbn
161 |   "Save a DBN."
162 |   [dbn filepath]
163 |   (spit filepath (pr-str dbn)))
164 | 
165 | (defn load-dbn
166 |   "Load a DBN from disk."
167 |   [filepath]
168 |   (edn/read-string {:readers {'deebn.rbm.RBM edn->RBM
169 |                               'deebn.rbm.CRBM edn->CRBM
170 |                               'deebn.dbn.DBN map->DBN
171 |                               'deebn.dbn.CDBN map->CDBN}} (slurp filepath)))
172 | 


--------------------------------------------------------------------------------
/src/deebn/dnn.clj:
--------------------------------------------------------------------------------
  1 | (ns deebn.dnn
  2 |   (:refer-clojure :exclude [+ * -])
  3 |   (:require [deebn.protocols :as p]
  4 |             [deebn.util :refer [sigmoid gen-softmax]]
  5 |             [clojure.core.matrix :as m]
  6 |             [clojure.core.matrix.operators :refer [+ * -]]
  7 |             [clojure.core.matrix.random :as rand]
  8 |             [clojure.core.matrix.select :as s]
  9 |             [clojure.tools.reader.edn :as edn])
 10 |   (:import java.io.Writer))
 11 | 
 12 | (m/set-current-implementation :vectorz)
 13 | 
 14 | (defrecord DNN [weights biases layers classes])
 15 | 
 16 | (defn dbn->dnn
 17 |   "Given a pretrained Deep Belief Network, use the trained weights and
 18 |   biases to build a Deep Neural Network."
 19 |   [dbn classes]
 20 |   (let [top-layer {:w (m/matrix (repeatedly
 21 |                                  (last (:layers dbn))
 22 |                                  #(rand/sample-normal classes)))
 23 |                    :bias (m/zero-vector classes)}]
 24 |     (map->DNN {:classes classes
 25 |                :weights (conj (mapv :w (:rbms dbn)) (:w top-layer))
 26 |                :biases (conj (mapv :hbias (:rbms dbn)) (:bias top-layer))
 27 |                :layers (:layers dbn)})))
 28 | 
 29 | (defn prop-up
 30 |   "Given an input matrix, weight matrix, and bias vector, propagate
 31 |   the signal through the layer."
 32 |   [input weights bias]
 33 |   (m/emap sigmoid (+ bias (m/mmul input weights))))
 34 | 
 35 | (defn feed-forward
 36 |   "Given an initial input batch and a DNN, feed the batch through the
 37 |   net, retaining the output of each layer."
 38 |   [batch dnn]
 39 |   (reductions #(prop-up %1 (first %2) (second %2))
 40 |               batch
 41 |               (map #(vector %1 %2) (:weights dnn) (:biases dnn))))
 42 | 
 43 | (defn net-output
 44 |   "Propagate an input matrix through the network."
 45 |   [net input]
 46 |   (m/matrix (reduce #(prop-up %1 (first %2) (second %2))
 47 |                     input
 48 |                     (mapv #(vector %1 %2) (:weights net) (:biases net)))))
 49 | 
 50 | (defn layer-error
 51 |   "Calculate the error for a particular layer in a net, given the
 52 |   weights for the next layer, the error for the next layer, and the
 53 |   output for the current layer."
 54 |   [weights next-error output]
 55 |   (* (m/mmul next-error (m/transpose weights)) (* output (- 1 output))))
 56 | 
 57 | (defn update-layer
 58 |   "Update the weights and biases of a layer, given the previous
 59 |   weights and biases, input coming into the weights, the error for the
 60 |   layer, the learning rate, and the batch size."
 61 |   [weights biases input error learning-rate lambda batch-size observations]
 62 |   (let [weights (- (* weights (- 1 (/ (* learning-rate lambda) observations)))
 63 |                    (* (/ learning-rate batch-size)
 64 |                       (reduce + (mapv m/outer-product
 65 |                                       (m/rows input)
 66 |                                       (m/rows error)))))
 67 |         biases (- biases (* (/ learning-rate batch-size)
 68 |                             (reduce + (m/rows error))))]
 69 |     [weights biases]))
 70 | 
 71 | (defn train-batch
 72 |   "Given a batch of training data and a DNN, update the weights and
 73 |   biases accordingly."
 74 |   [batch dnn observations learning-rate lambda]
 75 |   (let [data (m/matrix (s/sel batch
 76 |                               (s/irange)
 77 |                               (range 0 (dec (m/column-count batch)))))
 78 |         targets (m/matrix (mapv #(gen-softmax %1 (:classes dnn))
 79 |                                 (s/sel batch (s/irange) s/end)))
 80 |         data (feed-forward data dnn)
 81 |         errors (m/emap #(- %1 %2)
 82 |                        (last data) targets)
 83 |         errors (reverse (reductions #(layer-error (first %2) %1 (second %2))
 84 |                                     errors
 85 |                                     (map #(vector %1 %2)
 86 |                                          (reverse (rest (:weights dnn)))
 87 |                                          (reverse (butlast (rest data))))))
 88 |         updated (mapv #(update-layer %1 %2 %3 %4
 89 |                                      learning-rate
 90 |                                      lambda
 91 |                                      (m/row-count batch)
 92 |                                      observations)
 93 |                       (:weights dnn)
 94 |                       (:biases dnn)
 95 |                       (butlast data)
 96 |                       errors)]
 97 |     (assoc dnn :weights (mapv first updated) :biases (mapv second updated))))
 98 | 
 99 | (defn train-epoch
100 |   "Given a training dataset and a net, train it for one epoch (one
101 |   pass over the dataset)."
102 |   [net dataset observations learning-rate lambda batch-size]
103 |   (loop [net net
104 |          batch (m/matrix (s/sel dataset (range 0 batch-size) (s/irange)))
105 |          batch-num 0]
106 |     (let [start (* batch-num batch-size)
107 |           end (min (* (inc batch-num) batch-size) (m/row-count dataset))]
108 |       (if (>= start (m/row-count dataset))
109 |         net
110 |         (do
111 |           (recur (train-batch batch net observations learning-rate lambda)
112 |                  (m/matrix (s/sel dataset (range start end) (s/irange)))
113 |                  (inc batch-num)))))))
114 | 
115 | (defn train-top-layer
116 |   "Pre-train the top logistic regression layer before moving to fine-tuning."
117 |   [dnn dataset observations batch-size epochs learning-rate lambda]
118 |   (println "Propagating dataset through DNN...")
119 |   (let [top-layer {:weights (vector (last (:weights dnn)))
120 |                    :biases (vector (last (:biases dnn)))
121 |                    :classes (:classes dnn)}
122 |         output (net-output
123 |                 (assoc dnn
124 |                        :weights (butlast (:weights dnn))
125 |                        :biases (butlast (:biases dnn)))
126 |                 (m/matrix
127 |                  (s/sel dataset
128 |                         (s/irange)
129 |                         (range 0 (dec (m/column-count dataset))))))
130 |         targets (m/reshape (m/matrix (s/sel dataset (s/irange) s/end))
131 |                            [(m/row-count dataset) 1])
132 |         dataset (m/matrix (m/join-along 1 output targets))]
133 |     (println "Pre-training logistic regression layer, epoch 1")
134 |     (loop [epoch 2
135 |            top-layer (train-epoch top-layer dataset observations
136 |                                   learning-rate lambda batch-size)]
137 |       (if (> epoch epochs)
138 |         (assoc dnn
139 |                :weights (assoc (:weights dnn) (dec (count (:weights dnn)))
140 |                                (first (:weights top-layer)))
141 |                :biases (assoc (:biases dnn) (dec (count (:biases dnn)))
142 |                               (first (:biases top-layer))))
143 |         (do
144 |           (println "Pre-training logistic regression layer, epoch" epoch)
145 |           (recur (inc epoch)
146 |                  (train-epoch top-layer dataset observations
147 |                               learning-rate lambda batch-size)))))))
148 | 
149 | (defn train-dnn
150 |   "Given a labeled dataset, train a DNN.
151 | 
152 |   The dataset should have the label as the last element of each input
153 |   vector.
154 | 
155 |   params is a map that may have the following keys:
156 |   batch-size: default 100
157 |   epochs: default 100
158 |   learning-rate: default 0.5
159 |   lambda: default 0.1 "
160 |   [dnn dataset params]
161 |   (let [{:keys [batch-size epochs learning-rate lambda]
162 |          :or {batch-size 100
163 |               epochs 100
164 |               learning-rate 0.5
165 |               lambda 0.1}} params
166 |               observations (m/row-count dataset)
167 |               net (train-top-layer dnn dataset observations batch-size
168 |                                    epochs learning-rate lambda)]
169 |     (println "Training epoch 1")
170 |     (loop [epoch 2
171 |            net (train-epoch net dataset observations
172 |                             learning-rate lambda batch-size)]
173 |       (if (> epoch epochs)
174 |         net
175 |         (do
176 |           (println "\nTraining epoch" epoch)
177 |           (recur (inc epoch)
178 |                  (train-epoch net dataset observations learning-rate
179 |                               lambda batch-size)))))))
180 | 
181 | (extend-protocol p/Trainable
182 |   DNN
183 |   (train-model [m dataset params]
184 |     (train-dnn m dataset params)))
185 | 
186 | 
187 | ;;;===========================================================================
188 | ;;; Testing a DNN trained on a data set
189 | ;;;===========================================================================
190 | 
191 | (defn softmax->class
192 |   "Get the predicted class from a softmax output."
193 |   [x]
194 |   (let [largest (m/emax x)
195 |         indexed (zipmap x (range (m/row-count x)))]
196 |     (get indexed largest)))
197 | 
198 | (defn classify-obv
199 |   "Given a DNN and a single observation, return the model's prediction."
200 |   [dnn obv]
201 |   (softmax->class (net-output dnn obv)))
202 | 
203 | (extend-protocol p/Classify
204 |   DNN
205 |   (classify [m obv]
206 |     (classify-obv m obv)))
207 | 
208 | (defn test-dnn
209 |   "Test a Deep Neural Network on a dataset. Returns an error percentage.
210 | 
211 |   dataset should have the label as the last entry in each observation."
212 |   [dnn dataset]
213 |   (let [num-observations (m/row-count dataset)
214 |         predictions (mapv #(softmax->class (net-output dnn %1))
215 |                           (m/matrix (s/sel
216 |                                      dataset
217 |                                      (s/irange)
218 |                                      (range 0 (dec (m/column-count dataset))))))
219 |         errors (mapv #(if (== (last %1) %2) 0 1) dataset predictions)]
220 |     (double (/ (m/esum errors) num-observations))))
221 | 
222 | (extend-protocol p/Testable
223 |   DNN
224 |   (test-model [m dataset]
225 |     (test-dnn m dataset)))
226 | 
227 | ;;;===========================================================================
228 | ;;; Utility functions for a DNN
229 | ;;;===========================================================================
230 | 
231 | (defmethod clojure.core/print-method DNN print-DNN [dnn ^Writer w]
232 |   (.write w (str "#deebn.dnn.DNN {"
233 |                  " :weights " (mapv m/to-nested-vectors (:weights dnn))
234 |                  " :biases " (mapv m/to-nested-vectors (:biases dnn))
235 |                  " :layers " (:layers dnn)
236 |                  " :classes " (:classes dnn)
237 |                  " }")))
238 | 
239 | (defn save-dnn
240 |   "Save a DNN to disk."
241 |   [dnn filepath]
242 |   (spit filepath (pr-str dnn)))
243 | 
244 | (defn edn->DNN
245 |   "The default map->DNN function provided by the defrecord doesn't
246 |   provide us with the performant implementation (i.e. matrices and
247 |   arrays from core.matrix), so this function adds a small step to
248 |   ensure that."
249 |   [data]
250 |   (->DNN (mapv m/matrix (:weights data))
251 |          (mapv m/matrix (:biases data))
252 |          (:layers data)
253 |          (:classes data)))
254 | 
255 | (defn load-dnn
256 |   "Load a DNN from disk."
257 |   [filepath]
258 |   (edn/read-string {:readers {'deebn.dnn.DNN edn->DNN}} (slurp filepath)))
259 | 


--------------------------------------------------------------------------------
/src/deebn/mnist.clj:
--------------------------------------------------------------------------------
 1 | (ns deebn.mnist
 2 |   (:require [clojure.core.matrix :as matrix]
 3 |             [clojure.core.matrix.select :as select]
 4 |             [clojure.data.csv :as csv]
 5 |             [clojure.java.io :as io]
 6 |             [deebn.util :refer [softmax-from-obv]]))
 7 | 
 8 | (matrix/set-current-implementation :vectorz)
 9 | 
10 | (defn scale-data
11 |   "Scale the input parameters to [0-1]. This assumes that the label is
12 |   the first element. After scaling, the label is the last element."
13 |   [x]
14 |   (conj (mapv #(/ % 255.0) (rest x)) (first x)))
15 | 
16 | (defn load-data
17 |   "Load a MNIST CSV data set."
18 |   [filepath]
19 |   (let [data (with-open [in-file (io/reader filepath)]
20 |                (->> in-file
21 |                     (csv/read-csv)
22 |                     (matrix/emap read-string)
23 |                     (mapv scale-data)))]
24 |     (matrix/matrix data)))
25 | 
26 | (defn load-data-sans-label
27 |   "Load a MNIST CSV data set without the label"
28 |   [filepath]
29 |   (let [data (load-data filepath)]
30 |     (matrix/matrix (select/sel data (select/irange) (range 0 784)))))
31 | 
32 | (defn load-data-with-softmax
33 |   "Load a dataset with the class label expanded to a softmax-appropriate form.
34 |   Example: In the MNIST dataset, class '7' expands to -> '0 0 0 0 0 0 0 1 0 0"
35 |   [filepath]
36 |   (let [data (load-data filepath)
37 |         data (mapv #(softmax-from-obv % 10) (matrix/rows data))]
38 |     (matrix/matrix data)))
39 | 


--------------------------------------------------------------------------------
/src/deebn/protocols.clj:
--------------------------------------------------------------------------------
 1 | (ns deebn.protocols)
 2 | 
 3 | (defprotocol Trainable
 4 |   "Protocol for models that are trainable with a dataset."
 5 |   (train-model [m dataset params]
 6 |     "Train a model, given a dataset and relevant
 7 |     hyper-parameters. Refer to individual training functions for
 8 |     hyper-parameter details."))
 9 | 
10 | (defprotocol Testable
11 |   "Protocol for models that are testable."
12 |   (test-model [m dataset]
13 |     "Test a trained model given a dataset. The dataset may need to be
14 |     in a different format from that used to train."))
15 | 
16 | (defprotocol Classify
17 |   "Protocol for models that can classify a given observation."
18 |   (classify [m obv]
19 |     "Use the given model to classify a single observation."))
20 | 


--------------------------------------------------------------------------------
/src/deebn/rbm.clj:
--------------------------------------------------------------------------------
  1 | (ns deebn.rbm
  2 |   (:refer-clojure :exclude [+ - * / ==])
  3 |   (:require [deebn.protocols :refer [Testable Trainable Classify]]
  4 |             [deebn.util :refer [bernoulli gen-softmax
  5 |                                 get-min-position sigmoid]]
  6 |             [clojure.core.matrix :as m]
  7 |             [clojure.core.matrix.operators :refer [+ - * / ==]]
  8 |             [clojure.core.matrix.select :as s]
  9 |             [clojure.core.matrix.random :as rand]
 10 |             [clojure.core.matrix.stats :as stats]
 11 |             [clojure.set :refer [difference]]
 12 |             [clojure.tools.reader.edn :as edn])
 13 |   (:import java.io.Writer))
 14 | 
 15 | (m/set-current-implementation :vectorz)
 16 | 
 17 | ;;;===========================================================================
 18 | ;;; Generate Restricted Boltzmann Machines
 19 | ;;; ==========================================================================
 20 | ;; We define a purely generative RBM, trained without any class
 21 | ;; labels, and a classification RBM
 22 | (defrecord RBM [w vbias hbias w-vel vbias-vel hbias-vel visible hidden])
 23 | (defrecord CRBM [w vbias hbias w-vel vbias-vel hbias-vel visible hidden classes])
 24 | 
 25 | (defn build-rbm
 26 |   "Factory function to produce an RBM record."
 27 |   [visible hidden]
 28 |   (let [w (m/matrix (repeatedly visible #(/ (rand/sample-normal hidden) 100)))
 29 |         w-vel (m/zero-matrix visible hidden)
 30 |         ;; TODO: The visual biases should really be set to
 31 |         ;; log(p_i/ (1  - p_i)), where p_i is the proportion of
 32 |         ;; training vectors in which unit i is turned on.
 33 |         vbias (m/zero-vector visible)
 34 |         hbias (m/array (repeat hidden -4))
 35 |         vbias-vel (m/zero-vector visible)
 36 |         hbias-vel (m/zero-vector hidden)]
 37 |     (->RBM w vbias hbias w-vel vbias-vel hbias-vel visible hidden)))
 38 | 
 39 | (defn build-jd-rbm
 40 |   "Factory function to build a joint density RBM for testing purposes.
 41 | 
 42 |   This RBM has two sets of visible units - the typical set
 43 |   representing each observation in the data set, and a softmax unit
 44 |   representing the label for each observation. These are combined, and
 45 |   the label becaomes part of the input vector."
 46 |   [visible hidden classes]
 47 |   (let [rbm (build-rbm (+ visible classes) hidden)]
 48 |     (map->CRBM (assoc rbm :classes classes))))
 49 | 
 50 | 
 51 | ;;;===========================================================================
 52 | ;;; Train an RBM
 53 | ;;; ==========================================================================
 54 | 
 55 | 
 56 | (defn update-weights
 57 |   "Determine the weight gradient from this batch"
 58 |   [ph ph2 batch pv]
 59 |   (reduce + (map #(- (m/outer-product %1 %2) (m/outer-product %3 %4))
 60 |                  (m/rows batch) (m/rows ph)
 61 |                  (m/rows pv)    (m/rows ph2))))
 62 | 
 63 | ;; TODO: Implement CD-K - currently CD-1 is hard-coded.
 64 | (defn update-rbm
 65 |   "Single batch step update of RBM parameters"
 66 |   [batch rbm learning-rate momentum]
 67 |   (let [batch-size (m/row-count batch)
 68 |         ph (m/emap sigmoid (+ (:hbias rbm) (m/mmul batch (:w rbm))))
 69 |         h (m/emap bernoulli ph)
 70 |         pv (m/emap sigmoid (+ (:vbias rbm)
 71 |                               (m/mmul h (m/transpose (:w rbm)))))
 72 |         v (m/emap bernoulli pv)
 73 |         ph2 (m/emap sigmoid (+ (:hbias rbm) (m/mmul v (:w rbm))))
 74 |         delta-w (/ (update-weights ph ph2 batch pv) batch-size)
 75 |         delta-vbias (/ (reduce + (map #(- % %2)
 76 |                                       (m/rows batch)
 77 |                                       (m/rows pv)))
 78 |                        batch-size)
 79 |         delta-hbias (/ (reduce + (map #(- % %2)
 80 |                                       (m/rows h)
 81 |                                       (m/rows ph2)))
 82 |                        batch-size)
 83 |         w-vel (+ (* momentum (:w-vel rbm)) (* learning-rate delta-w))
 84 |         vbias-vel (+ (* momentum (:vbias-vel rbm))
 85 |                      (* learning-rate delta-vbias))
 86 |         hbias-vel (+ (* momentum (:hbias-vel rbm))
 87 |                      (* learning-rate delta-hbias))]
 88 |     (assoc rbm
 89 |       :w (+ (:w rbm) w-vel)
 90 |       :vbias (+ (:vbias rbm) vbias-vel)
 91 |       :hbias (+ (:hbias rbm) hbias-vel)
 92 |       :w-vel w-vel :vbias-vel vbias-vel :hbias-vel hbias-vel)))
 93 | 
 94 | (defn train-epoch
 95 |   "Train a single epoch"
 96 |   [rbm dataset learning-rate momentum batch-size]
 97 |   (loop [rbm rbm
 98 |          batch (m/matrix (s/sel dataset (range 0 batch-size) (s/irange)))
 99 |          batch-num 1]
100 |     (let [start (* (dec batch-num) batch-size)
101 |           end (min (* batch-num batch-size) (m/row-count dataset))]
102 |       (if (>= start (m/row-count dataset))
103 |         rbm
104 |         (do
105 |           (print ".")
106 |           (flush)
107 |           (recur (update-rbm batch rbm learning-rate momentum)
108 |                  (m/matrix (s/sel dataset (range start end) (s/irange)))
109 |                  (inc batch-num)))))))
110 | 
111 | (defn select-overfitting-sets
112 |   "Given a dataset, attempt to choose reasonable validation and test
113 |   sets to monitor overfitting."
114 |   [dataset]
115 |   (let [obvs (m/row-count dataset)
116 |         validation-indices (set (repeatedly (/ obvs 100) #(rand-int obvs)))
117 |         validations (m/matrix (s/sel dataset
118 |                                      (vec validation-indices) (s/irange)))
119 |         train-indices (difference
120 |                        (set (repeatedly (/ obvs 100)
121 |                                         #(rand-int obvs))) validation-indices)
122 |         train-sample (m/matrix (s/sel dataset (vec train-indices) (s/irange)))]
123 |     {:validations validations
124 |      :train-sample train-sample
125 |      :dataset  (s/sel dataset (s/exclude (vec validation-indices))
126 |                       (s/irange))}))
127 | 
128 | (defn free-energy
129 |   "Compute the free energy of a given visible vector and RBM. Lower is
130 |   better."
131 |   [x rbm]
132 |   (let [hidden-input (+ (:hbias rbm) (m/mmul x (:w rbm)))]
133 |     (- (- (m/mmul x (:vbias rbm)))
134 |        (reduce + (mapv #(Math/log (+ 1 (Math/exp %))) hidden-input)))))
135 | 
136 | (defn check-overfitting
137 |   "Given an rbm, a sample from the training set, and a validation set,
138 |   determine if the model is starting to overfit the data. This is
139 |   measured by a difference in the average free energy over the
140 |   training set sample and the validation set."
141 |   [rbm train-sample validations]
142 |   (let [avg-train-energy (stats/mean (pmap #(free-energy %1 rbm)
143 |                                            (m/rows train-sample)))
144 |         avg-validation-energy (stats/mean (pmap #(free-energy %1 rbm)
145 |                                                 (m/rows validations)))]
146 |     (Math/abs ^Double (- avg-train-energy avg-validation-energy))))
147 | 
148 | (defn train-rbm
149 |   "Given a training set, train an RBM
150 | 
151 |   params is a map with various options:
152 |   learning-rate: defaults to 0.1
153 |   initial-momentum: starting momentum. Defaults to 0.5
154 |   momentum: momentum after `momentum-delay` epochs have passed. Defaults to 0.9
155 |   momentum-delay: epochs after which `momentum` is used instead of
156 |   `initial-momentum`. Defaults to 3
157 |   batch-size: size of each mini-batch. Defaults to 10
158 |   epochs: number of times to train the model over the entire training set.
159 |   Defaults to 100
160 |   gap-delay: number of epochs elapsed before early stopping is considered
161 |   gap-stop-delay: number of sequential epochs where energy gap is increasing
162 |   before stopping"
163 |   [rbm dataset params]
164 |   (let [{:keys [validations train-sample dataset]}
165 |         (select-overfitting-sets dataset)
166 |         {:keys [learning-rate initial-momentum momentum momentum-delay
167 |                 batch-size epochs gap-delay gap-stop-delay]
168 |          :or {learning-rate 0.1
169 |               initial-momentum 0.5
170 |               momentum 0.9
171 |               momentum-delay 3
172 |               batch-size 10
173 |               epochs 100
174 |               gap-delay 10
175 |               gap-stop-delay 2}} params]
176 |     (println "Training epoch 1")
177 |     (loop [rbm (train-epoch rbm dataset learning-rate
178 |                             initial-momentum batch-size)
179 |            epoch 2
180 |            energy-gap (check-overfitting rbm train-sample validations)
181 |            gap-inc-count 0]
182 |       (if (> epoch epochs)
183 |         rbm
184 |         (do (println "\nTraining epoch" epoch)
185 |             (let [curr-momentum (if (> epoch momentum-delay)
186 |                                   momentum initial-momentum)
187 |                   rbm (train-epoch rbm dataset learning-rate
188 |                                    curr-momentum batch-size)
189 |                   gap-after-train (check-overfitting rbm train-sample
190 |                                                      validations)
191 |                   _ (println "\nGap pre-train:" energy-gap
192 |                              "After train:" gap-after-train)]
193 |               (if (and (>= epoch gap-delay)
194 |                        (neg? (- energy-gap gap-after-train))
195 |                        (>= gap-inc-count gap-stop-delay))
196 |                 rbm
197 |                 (recur rbm
198 |                        (inc epoch)
199 |                        gap-after-train
200 |                        (if (neg? (- energy-gap gap-after-train))
201 |                          (inc gap-inc-count)
202 |                          0)))))))))
203 | 
204 | (extend-protocol Trainable
205 |   CRBM
206 |   (train-model [m dataset params]
207 |     (train-rbm m dataset params))
208 |   RBM
209 |   (train-model [m dataset params]
210 |     (train-rbm m dataset params)))
211 | 
212 | 
213 | ;;;===========================================================================
214 | ;;; Testing an RBM trained on a data set
215 | ;;;===========================================================================
216 | 
217 | (defn get-prediction
218 |   "For a given observation and RBM, return the predicted class."
219 |   [x rbm num-classes labeled?]
220 |   (let [softmax-cases  (mapv #(gen-softmax % num-classes) (range num-classes))
221 |         trials  (m/matrix (mapv #(m/join % %2) softmax-cases
222 |                                 (if labeled?
223 |                                   (repeat (butlast x))
224 |                                   (repeat x))))
225 |         results (mapv #(free-energy % rbm) trials)]
226 |     (get-min-position results)))
227 | 
228 | (extend-protocol Classify
229 |   CRBM
230 |   (classify [m obv]
231 |     (get-prediction obv m (:classes m) false)))
232 | 
233 | (defn test-rbm
234 |   "Test a joint density RBM trained on a data set. Returns an error
235 |   percentage.
236 | 
237 |   dataset should have the label as the last entry in each
238 |   observation."
239 |   [rbm dataset num-classes]
240 |   (let [num-observations (m/row-count dataset)
241 |         predictions (pmap #(get-prediction % rbm num-classes true) dataset)
242 |         errors (mapv #(if (== (last %) %2) 0 1) dataset predictions)
243 |         total (m/esum errors)]
244 |     (double (/ total num-observations))))
245 | 
246 | (extend-protocol Testable
247 |   CRBM
248 |   (test-model [m dataset]
249 |     (test-rbm m dataset (:classes m))))
250 | 
251 | 
252 | ;;;===========================================================================
253 | ;;; Utility functions for an RBM
254 | ;;;===========================================================================
255 | 
256 | ;; This is designed for EDN printing, not actually visualizing the RBM
257 | ;; at the REPL (this is only needed because similar methods are not
258 | ;; defined for clojure.core.matrix implementations)
259 | (defmethod clojure.core/print-method RBM print-RBM [rbm ^Writer w]
260 |   (.write w (str "#deebn.rbm.RBM {"
261 |                  " :w " (m/to-nested-vectors (:w rbm))
262 |                  " :vbias " (m/to-nested-vectors (:vbias rbm))
263 |                  " :hbias " (m/to-nested-vectors (:hbias rbm))
264 |                  " :w-vel " (m/to-nested-vectors (:w-vel rbm))
265 |                  " :vbias-vel " (m/to-nested-vectors (:vbias-vel rbm))
266 |                  " :hbias-vel " (m/to-nested-vectors (:hbias-vel rbm))
267 |                  " :visible " (:visible rbm)
268 |                  " :hidden " (:hidden rbm)
269 |                  " }")))
270 | 
271 | (defmethod clojure.core/print-method CRBM print-CRBM [rbm ^Writer w]
272 |   (.write w (str "#deebn.rbm.CRBM {"
273 |                  " :w " (m/to-nested-vectors (:w rbm))
274 |                  " :vbias " (m/to-nested-vectors (:vbias rbm))
275 |                  " :hbias " (m/to-nested-vectors (:hbias rbm))
276 |                  " :w-vel " (m/to-nested-vectors (:w-vel rbm))
277 |                  " :vbias-vel " (m/to-nested-vectors (:vbias-vel rbm))
278 |                  " :hbias-vel " (m/to-nested-vectors (:hbias-vel rbm))
279 |                  " :visible " (:visible rbm)
280 |                  " :hidden " (:hidden rbm)
281 |                  " :classes " (:classes rbm)
282 |                  " }")))
283 | 
284 | (defn save-rbm
285 |   "Save a RBM to disk."
286 |   [rbm filepath]
287 |   (spit filepath (pr-str rbm)))
288 | 
289 | (defn edn->RBM
290 |   "The default map->RBM function provided by the defrecord doesn't
291 |   provide us with the performant implementation (i.e. matrices and
292 |   arrays from core.matrix), so this function adds a small step to
293 |   ensure that."
294 |   [data]
295 |   (->RBM (m/matrix (:w data))
296 |          (m/matrix (:vbias data))
297 |          (m/matrix (:hbias data))
298 |          (m/matrix (:w-vel data))
299 |          (m/matrix (:vbias-vel data))
300 |          (m/matrix (:hbias-vel data))
301 |          (:visible data)
302 |          (:hidden data)))
303 | 
304 | (defn edn->CRBM
305 |   "The default map->RBM function provided by the defrecord doesn't
306 |   provide us with the performant implementation (i.e. matrices and
307 |   arrays from core.matrix), so this function adds a small step to
308 |   ensure that."
309 |   [data]
310 |   (->CRBM (m/matrix (:w data))
311 |           (m/matrix (:vbias data))
312 |           (m/matrix (:hbias data))
313 |           (m/matrix (:w-vel data))
314 |           (m/matrix (:vbias-vel data))
315 |           (m/matrix (:hbias-vel data))
316 |           (:visible data)
317 |           (:hidden data)
318 |           (:classes data)))
319 | 
320 | (defn load-rbm
321 |   "Load a RBM from disk."
322 |   [filepath]
323 |   (edn/read-string {:readers {'deebn.rbm.RBM edn->RBM
324 |                               'deebn.rbm.CRBM edn->CRBM}} (slurp filepath)))
325 | 


--------------------------------------------------------------------------------
/src/deebn/util.clj:
--------------------------------------------------------------------------------
 1 | (ns deebn.util
 2 |   (:refer-clojure :exclude [+ - * /])
 3 |   (:require [clojure.core.matrix :as m]
 4 |             [clojure.core.matrix.operators :refer [+ - * /]])
 5 |   (:import mikera.vectorz.Scalar))
 6 | 
 7 | (defn bernoulli
 8 |   "Take a single Bernoulli sample, given a probability"
 9 |   [p]
10 |   (if (> (rand) p) 0 1))
11 | 
12 | (defn sigmoid
13 |   "Sigmoid function, used as an activation function for nodes in a
14 |   network."
15 |   [^double x]
16 |   (/ (inc (Math/exp (- x)))))
17 | 
18 | (defn query-hidden
19 |   "Given an RBM and an input vector, query the RBM for the state of
20 |   the hidden nodes."
21 |   [rbm x mean-field?]
22 |   (let [pre-sample (m/emap sigmoid (+ (:hbias rbm) (m/mmul x (:w rbm))))]
23 |     (if mean-field? pre-sample
24 |         (map bernoulli pre-sample))))
25 | 
26 | (defn gen-softmax
27 |   "Generate a softmax output. x is the class represented by the
28 |   output, with 0 represented by the first element in the vector."
29 |   [x num-classes]
30 |   (m/mset (m/zero-vector num-classes) x 1.0))
31 | 
32 | (defn softmax-from-obv
33 |   "Given an observation with label attached, replace the label value
34 |   with an appropriate softmax unit. This assumes that the label is the
35 |   last element in an observation."
36 |   [x num-classes]
37 |   (let [label (last x)
38 |         obv (butlast x)
39 |         new-label (gen-softmax label num-classes)]
40 |     (vec (concat new-label obv))))
41 | 
42 | (defn get-min-position
43 |   "Get the position of the minimum element of a collection."
44 |   [x]
45 |   (if (not (empty? x))
46 |     (let [least (m/emin x)
47 |           indexed (zipmap (map #(.get ^Scalar %) x) (range (count x)))]
48 |       (get indexed least))))
49 | 
50 | (defn random-subset
51 |   "Return a matrix of a n rows randomly selected from a dataset."
52 |   [n dataset]
53 |   (m/matrix (take n (shuffle (m/rows dataset)))))
54 | 


--------------------------------------------------------------------------------