├── .gitignore
├── LICENSE
├── README.md
├── build.sbt
├── project
    ├── build.properties
    └── plugins.sbt
└── src
    └── main
        └── scala
            └── kr
                └── ac
                    └── kaist
                        └── ir
                            └── deep
                                ├── field
                                    └── package.scala
                                ├── fn
                                    ├── Activation.scala
                                    ├── Objective.scala
                                    ├── ScalarMatrix.scala
                                    ├── WeightUpdater.scala
                                    └── package.scala
                                ├── layer
                                    ├── BasicLayer.scala
                                    ├── Dropout.scala
                                    ├── DropoutOperation.scala
                                    ├── FullTensorLayer.scala
                                    ├── GaussianRBFLayer.scala
                                    ├── LowerTriangularLayer.scala
                                    ├── Normalize.scala
                                    ├── NormalizeOperation.scala
                                    ├── Rank3TensorLayer.scala
                                    ├── ReconBasicLayer.scala
                                    ├── Reconstructable.scala
                                    ├── SplitTensorLayer.scala
                                    └── package.scala
                                ├── network
                                    ├── AutoEncoder.scala
                                    ├── BasicNetwork.scala
                                    ├── StackedAutoEncoder.scala
                                    └── package.scala
                                ├── package.scala
                                ├── rec
                                    ├── BinaryTree.scala
                                    ├── Leaf.scala
                                    ├── Node.scala
                                    ├── WildcardLeaf.scala
                                    └── package.scala
                                ├── train
                                    ├── AEType.scala
                                    ├── DistBeliefTrainStyle.scala
                                    ├── ManipulationType.scala
                                    ├── MultiThreadTrainStyle.scala
                                    ├── RAEType.scala
                                    ├── RandomEqualPartitioner.scala
                                    ├── SingleThreadTrainStyle.scala
                                    ├── StandardRAEType.scala
                                    ├── TrainStyle.scala
                                    ├── Trainer.scala
                                    ├── TrainingCriteria.scala
                                    ├── TreeType.scala
                                    ├── URAEType.scala
                                    ├── VectorType.scala
                                    └── package.scala
                                └── wordvec
                                    ├── PrepareCorpus.scala
                                    ├── StringToVectorType.scala
                                    ├── StringType.scala
                                    └── package.scala


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.idea/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ScalaNetwork 1.3.0
  2 | ====================
  3 | 
  4 | ## Currently, we don't maintain this repository any more. The code had not designed well, hence we re-designed the entire logic. For the latest library, check [DeepSpark](https://github.com/nearbydelta/deepspark).
  5 | 
  6 | A *Neural Network implementation* with Scala, [Breeze](https://github.com/scalanlp/breeze) & [Spark](http://spark.apache.org)
  7 | 
  8 | Spark Network follows [GPL v2 license](http://choosealicense.com/licenses/gpl-2.0/).
  9 | 
 10 | # Features
 11 | 
 12 | ## Network
 13 | 
 14 | ScalaNetwork supports following layered neural network implementation:
 15 | 
 16 | * *Fully-connected* Neural Network : f(Wx + b)
 17 | * *Fully-connected* Rank-3 Tensor Network : f(v<sub>1</sub><sup>T</sup>Q<sup>[1:k]</sup>v<sub>2</sub> + L<sup>[1:k]</sup>v + b)
 18 | * *Fully-connected* Auto Encoder
 19 | * *Fully-connected* Stacked Auto Encoder
 20 | 
 21 | Also you can implement following Recursive Network via training tools.
 22 | 
 23 | * Traditional *Recursive* Auto Encoder (RAE)
 24 | * Standard *Recursive* Auto Encoder (RAE)
 25 | * Unfolding *Recursive* Auto Encoder (RAE) <sup>[EXPERIMENTAL]</sup>
 26 | 
 27 | ## Training Methodology
 28 | 
 29 | ScalaNetwork supports following training methodologies:
 30 | 
 31 | * Stochastic Gradient Descent w/ L1-, L2-regularization, Momentum.
 32 | * [AdaGrad](http://www.magicbroom.info/Papers/DuchiHaSi10.pdf)
 33 | * [AdaDelta](http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf)
 34 | 
 35 | ScalaNetwork supports following environments:
 36 | 
 37 | * Single-Threaded Training Environment.
 38 | * Spark-based Distributed Environment, with modified version of Downpour SGD in [DistBelief](http://research.google.com/archive/large_deep_networks_nips2012.html)
 39 | 
 40 | Also you can add negative examples with `Trainer.setNegativeSampler()`.
 41 | 
 42 | ## Activation Function
 43 | 
 44 | ScalaNetwork supports following activation functions:
 45 | 
 46 | * Linear
 47 | * Sigmoid
 48 | * HyperbolicTangent
 49 | * Rectifier
 50 | * Softplus
 51 | * HardSigmoid
 52 | * HardTanh
 53 | * Softmax
 54 | 
 55 | And also you can make new activation function using several operations.
 56 | 
 57 | # Usage
 58 | 
 59 | Here is some examples for basic usage. If you want to extend this package or use it more precisely, please refer [ScalaDoc](http://nearbydelta.github.io/ScalaNetwork/api/#kr.ac.kaist.ir.deep.package)
 60 | 
 61 | ## Download
 62 | 
 63 | Currently ScalaNetwork supports Scala version 2.10 ~ 2.11.
 64 | 
 65 | * Stable Release is 1.3.0
 66 |  
 67 | If you are using SBT, add a dependency as described below:
 68 | 
 69 | ```scala
 70 | libraryDependencies += "kr.ac.kaist.ir" %% "scalanetwork" % "1.3.0"
 71 | ```
 72 | 
 73 | If you are using Maven, add a dependency as described below:
 74 | ```xml
 75 | <dependency>
 76 |   <groupId>kr.ac.kaist.ir</groupId>
 77 |   <artifactId>scalanetwork_${your.scala.version}</artifactId>
 78 |   <version>1.3.0</version>
 79 | </dependency>
 80 | ```
 81 | 
 82 | ## Simple Example
 83 | `Network.apply(Activation, Int*)` generates fully-connected network:
 84 | 
 85 | ```scala
 86 | // Define 2 -> 4 -> 1 Layered, Fully connected network.
 87 | val net = Network(Sigmoid, 2, 4, 1)
 88 | // Define Manipulation Type. VectorType, AEType, RAEType, StandardRAEType, URAEType, and StringToVectorType.
 89 | val operation = new VectorType(
 90 |    corrupt = GaussianCorruption(variance = 0.1)
 91 | )
 92 | // Define Training Style. SingleThreadTrainStyle, MultiThreadTrainStyle, & DistBeliefTrainStyle
 93 | val style = new SingleThreadTrainStyle(
 94 |   net = net,
 95 |   algorithm = new StochasticGradientDescent(l2decay = 0.0001f),
 96 |   make = operation,
 97 |   param = SimpleTrainingCriteria(miniBatchFraction = 0.01f))
 98 | // Define Trainer
 99 | val train = new Trainer(
100 |   style = style,
101 |   stops = StoppingCriteria(maxIter = 100000))
102 | // Do Train
103 | train.train(set, valid)
104 | ```
105 | 
106 | ## Network Creation
107 | 
108 | To create network, you can choose one of the followings:
109 | 
110 | * Most simplest : Using sugar syntax, `Network.apply`
111 | 
112 | ```scala
113 | // Network(Activation, SizeOfLayer1, SizeOfLayer2, SizeOfLayer3, ...)
114 | Network(Sigmoid, 2, 4, 1)
115 | Network(HyperbolicTangent, 4, 10, 7)
116 | Network(Rectifier, 30, 10, 5)
117 | Network(Softplus, 100, 50, 30, 10, 1)
118 | ```
119 | 
120 | * If you want different activation functions for each layer,
121 | 
122 | ```scala
123 | val layer1 = new BasicLayer(10 -> 7, Sigmoid)
124 | val layer2 = new SplitTensorLayer((3, 4) -> 2, Rectifier)
125 | new BasicNetwork(Seq(layer1, layer2), 0.95)
126 | ```
127 | 
128 | Second argument of Basic Network indicates presence probability, 
129 | i.e. 1 - (neuron drop-out probability for drop-out training). Default is 1.
130 | 
131 | * If you want single-layer AutoEncoder,
132 | 
133 | ```scala
134 | val layer = new ReconBasicLayer(10 -> 7, Sigmoid)
135 | new AutoEncoder(layer, 0.95)
136 | ```
137 | 
138 | AutoEncoder only accepts `Reconstructable` type. Currently, `ReconBasicLayer` is only supported one. 
139 | (Tensor layer version is planned)
140 | 
141 | * If you want to stack autoencoders,
142 | 
143 | ```scala
144 | val net1 = new AutoEncoder(...)
145 | val net2 = new AutoEncoder(...)
146 | new StackedAutoEncoder(Seq(net1, net2))
147 | ```
148 | 
149 | Note that StackedAutoEncoder does not get any presence probability.
150 | 
151 | ## Training
152 | 
153 | ### Algorithm & Training Criteria
154 | Before choose Training Style, you must specify algorithm and training criteria.
155 | 
156 | ```scala
157 | /* Algorithms */
158 | new StochasticGradientDescent(rate=0.8, l1decay=0.0, l2decay=0.0001, momentum=0.0001)
159 | new AdaGrad(rate=0.6, l1decay=0.0, l2decay=0.0001)
160 | new AdaDelta(l1decay=0.0, l2decay=0.0001, decay=0.95, epsilon=1e-6)
161 | ```
162 | ```scala
163 | /* Training Criteria */
164 | import scala.concurrent.duration._
165 | SimpleTrainingCriteria(miniBatchFraction=0.01f, validationSize=20)
166 | DistBeliefCriteria(miniBatchFraction=0.01f, validationSize=20, submitInterval=1.seconds,
167 |   updateStep=2, fetchStep=10, numCores=1, repartitionOnStart = true, storageLevel = StorageLevel.MEMORY_ONLY)
168 | ```
169 | 
170 | Validation size sets the number of elements used for validation phrase.
171 | 
172 | ### Input Options
173 | Also you can specify input operations or options.
174 | 
175 | ```scala
176 | /* Corruptions */
177 | NoCorruption
178 | DroppingCorruption(presence=0.95)
179 | GaussianCorruption(mean=0, variance=0.1)
180 | ```
181 | ```scala
182 | /* Objective Functions */
183 | SquaredErr
184 | CrossEntropyErr // Which is Logistic Err
185 | ```
186 | ```scala
187 | /* Manipulation Type : Vector input, Vector output */
188 | // General Neural Network type
189 | new VectorType(corrupt, objective)
190 | // General AutoEncoder type
191 | new AEType(corrupt, objective)
192 | 
193 | /* Manipulation Type : Tree input, Null output (AutoEncoder) */
194 | // Train network as RAE style. 
195 | // Every internal node regarded as reconstruction its direct children (not all leaves).
196 | new RAEType(corrupt, objective)
197 | new StandardRAEType(corrupt, objective)
198 | // Experimental: Train network as URAE style. 
199 | // With same structure, network should reconstruct all leaves from root.
200 | new URAEType(corrupt, objective)
201 | 
202 | /* Manipulation Type : String input, Vector output */
203 | new StringToVectorType(model, objective)
204 | ```
205 | 
206 | ### Training Style
207 | You can choose the training style of the network.
208 | 
209 | ```scala
210 | /* Styles */
211 | new SingleThreadTrainStyle(net, algorithm, make:ManipulationType, param)
212 | new MultiThreadTrainStyle(net, sparkContext, algorithm, make:ManipulationType, param:DistBeliefCriteria)
213 | new DistBeliefTrainStyle(net, sparkContext, algorithm, make:ManipulationType, param:DistBeliefCriteria)
214 | ```
215 | 
216 | ### Training
217 | Training is done by `Trainer` class.
218 | 
219 | ```scala
220 | /* Stopping Criteria */
221 | StoppingCriteria(maxIter = 100000, waitAfterUpdate=2,
222 |   improveThreshold=0.95, lossThreshold=1e-4, validationFreq=1.0f)
223 | 
224 | /* Trainer */
225 | new Trainer(style = style, stops = StoppingCriteria(), name = "Trainer")
226 | ```
227 | 
228 | * **waitAfterUpdate** indicates wating time from the improvement. If network output improved on 100-th iteration,
229 |   the trainer waits until `Max(validationEpoch, 100 * patienceStep)`.
230 | * **Improve Threshold** indicates bottom line for improvement. 
231 |   To be regarded as improved, loss should be less than (best loss) * improveThreshold
232 | * **Loss threshold** indicates maximum loss can be accepted.
233 | * **Validation Frequency** sets the number of iterations between validations. (1 iteration does train all training examples)
234 | 
235 | Training is done by `train` method.
236 | 
237 | ```scala
238 | // If training and validation set are the same
239 | trainer.train(Seq[(IN, OUT)])
240 | trainer.train(Int => Seq[(IN, OUT)]) // With generator.
241 | 
242 | // If they are different
243 | trainer.train(Seq[(IN, OUT)], Seq[(IN, OUT)])
244 | trainer.train(Int => Seq[(IN, OUT)], Int => Seq[(IN, OUT)])
245 | 
246 | // If you are using RDD
247 | trainer.train(RDD[(IN, OUT)])
248 | trainer.train(RDD[(IN, OUT)], RDD[(IN, OUT)])
249 | ```
250 | 
251 | If you are using RDD, ScalaNetwork automatically caches your input sequence.
252 | 
253 | Also you can add negative examples, using `trainer.setNegativeTrainingReference()`
254 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | organization := "kr.ac.kaist.ir"
 2 | 
 3 | name := "ScalaNetwork"
 4 | 
 5 | version := "1.3.0-local"
 6 | 
 7 | scalaVersion := "2.10.4"
 8 | 
 9 | scalacOptions += "-target:jvm-1.7"
10 | 
11 | crossScalaVersions := Seq("2.10.4", "2.11.4")
12 | 
13 | resolvers ++= Seq("snapshots", "releases").map(Resolver.sonatypeRepo)
14 | 
15 | resolvers ++= Seq(
16 |   "Typesafe Releases" at "http://repo.typesafe.com/typesafe/releases/"
17 | )
18 | 
19 | libraryDependencies ++= Seq(
20 |   "org.apache.spark" %% "spark-core" % "1.5.0",
21 |   "com.typesafe.play" %% "play-json" % "2.3.4",
22 |   "org.scalanlp" %% "breeze" % "0.11.2"
23 | )
24 | 
25 | scalacOptions in Test ++= Seq("-Yrangepos")
26 | 
27 | licenses := Seq("GNU General Public License v2" → url("http://www.gnu.org/licenses/gpl-2.0.html"))
28 | 
29 | homepage := Some(url("http://nearbydelta.github.io/ScalaNetwork"))
30 | 
31 | publishTo <<= version { v: String ⇒
32 |   val nexus = "https://oss.sonatype.org/"
33 |   if (v.trim.endsWith("SNAPSHOT"))
34 |     Some("snapshots" at nexus + "content/repositories/snapshots")
35 |   else
36 |     Some("releases" at nexus + "service/local/staging/deploy/maven2")
37 | }
38 | 
39 | publishMavenStyle := true
40 | 
41 | publishArtifact in Test := false
42 | 
43 | pomIncludeRepository := { x ⇒ false}
44 | 
45 | pomExtra :=
46 |   <scm>
47 |     <url>git@github.com:nearbydelta/ScalaNetwork.git</url>
48 |     <connection>scm:git:git@github.com:nearbydelta/ScalaNetwork.git</connection>
49 |   </scm>
50 |     <developers>
51 |       <developer>
52 |         <id>nearbydelta</id>
53 |         <name>Bugeun Kim</name>
54 |         <url>http://bydelta.kr</url>
55 |       </developer>
56 |     </developers>
57 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 0.13.5


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | logLevel := Level.Warn
2 | 
3 | addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0")


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/field/package.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.Scalar
 4 | 
 5 | /**
 6 |  * Created by bydelta on 15. 4. 24.
 7 |  */
 8 | package object field {
 9 |   type Label = Int
10 |   type VertexFeature[IN] = (Label, IN) ⇒ Scalar
11 |   type EdgeFeature[IN] = (Label, Label, IN) ⇒ Scalar
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/fn/Activation.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.fn
  2 | 
  3 | import breeze.linalg.{DenseMatrix, diag, sum}
  4 | import breeze.numerics._
  5 | import play.api.libs.json.{JsObject, JsValue, Json}
  6 | 
  7 | import scala.annotation.tailrec
  8 | import scala.reflect.runtime._
  9 | 
 10 | /**
 11 |  * __Trait__ that describes an activation function for '''each layer'''
 12 |  *
 13 |  * Because these activation functions can be shared, we recommend to make inherited one as an object.
 14 |  */
 15 | trait Activation extends (ScalarMatrix ⇒ ScalarMatrix) with Serializable {
 16 |   /**
 17 |    * Compute differentiation value of this function at `f(x) = fx`
 18 |    *
 19 |    * @param fx the __output__ of this function
 20 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
 21 |    */
 22 |   def derivative(fx: ScalarMatrix): ScalarMatrix
 23 | 
 24 |   /**
 25 |    * Compute mapping for `x`
 26 |    *
 27 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
 28 |    * @return value of `f(x)`
 29 |    */
 30 |   def apply(x: ScalarMatrix): ScalarMatrix
 31 | 
 32 |   /**
 33 |    * Serialize Activation function into String.
 34 |    * @note If this is an "object", do not modify this function.
 35 |    *       This does not supports Activation Operations defined outside of this package.
 36 |    * @return JSON object states this function
 37 |    */
 38 |   def toJSON: JsObject = Json.obj("class" → this.getClass.getCanonicalName)
 39 | 
 40 |   /**
 41 |    * Initialize the weight matrix
 42 |    *
 43 |    * @param fanIn the number of __fan-in__ ''i.e. the number of neurons in previous layer''
 44 |    * @param fanOut the number of __fan-out__ ''i.e. the number of neurons in next layer''
 45 |    * @param rows the number of __rows of resulting matrix__ `(Default 0)`
 46 |    * @param cols the number of __cols of resulting matrix__ `(Default 0)`
 47 |    * @return the initialized weight matrix
 48 |    */
 49 |   def initialize(fanIn: Int, fanOut: Int, rows: Int = 0, cols: Int = 0): ScalarMatrix = {
 50 |     val pmMatx: ScalarMatrix = ScalarMatrix.of(if (rows > 0) rows else fanOut, if (cols > 0) cols else fanIn) :* 1e-2f
 51 |     pmMatx :+ 1e-2f
 52 |   }
 53 | }
 54 | 
 55 | /**
 56 |  * Companion Object of Activation.
 57 |  */
 58 | object Activation {
 59 |   @transient val runtimeMirror = universe.synchronized(universe.runtimeMirror(getClass.getClassLoader))
 60 | 
 61 |   /**
 62 |    * Reconstruct Activation function from given JSON value.
 63 |    * @param obj JSON value to be reconstructed
 64 |    * @return Activation reconstructed from JSON
 65 |    */
 66 |   def apply(obj: JsValue): Activation = {
 67 |     (obj \ "function").asOpt[String] match {
 68 |       case Some("scale") ⇒
 69 |         val base = apply(obj \ "base")
 70 |         val x = (obj \ "X").as[Float]
 71 |         val y = (obj \ "Y").as[Float]
 72 |         base *(x, y)
 73 |       case Some("translation") ⇒
 74 |         val base = apply(obj \ "base")
 75 |         val x = (obj \ "X").as[Float]
 76 |         val y = (obj \ "Y").as[Float]
 77 |         base +(x, y)
 78 |       case Some("add") ⇒
 79 |         val base = apply(obj \ "base")
 80 |         val args = (obj \ "args").as[Seq[JsValue]].map(apply)
 81 |         base.+(args: _*)
 82 |       case _ ⇒
 83 |         val str = (obj \ "class").asOpt[String] match {
 84 |           case Some(x) ⇒ x
 85 |           case _ ⇒ "kr.ac.kaist.ir.deep.fn." + obj.as[String]
 86 |         }
 87 |         universe.synchronized {
 88 |           val module = runtimeMirror.staticModule(str)
 89 |           runtimeMirror.reflectModule(module).instance.asInstanceOf[Activation]
 90 |         }
 91 |     }
 92 |   }
 93 | }
 94 | 
 95 | /**
 96 |  * __Activation Function__: Hard version of Sigmoid
 97 |  *
 98 |  * @note `sigmoid(x) = 1 / [exp(-x) + 1]`, hard version approximates tanh as piecewise linear function
 99 |  *       (derived from relationship between tanh & sigmoid, and tanh & hard tanh.)
100 |  *       We assumed the input of activation is a row vector.
101 |  * @example
102 |  * {{{val fx = HardSigmoid(0.0)
103 |  *          val diff = HardSigmoid.derivative(fx) }}}
104 |  */
105 | object HardSigmoid extends Activation {
106 |   /**
107 |    * Compute differentiation value of this function at `f(x) = fx`
108 |    *
109 |    * @param fx the __output__ of this function
110 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
111 |    */
112 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
113 |     // Because fx is n by 1 matrix, generate n by n matrix
114 |     val res = ScalarMatrix $0(fx.rows, fx.rows)
115 |     // Output is diagonal matrix, with dfi(xi)/dxi.
116 |     derivCoord(fx, res, fx.rows - 1)
117 |   }
118 | 
119 |   /**
120 |    * Compute mapping for `x`
121 |    *
122 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
123 |    * @return value of `f(x)`
124 |    */
125 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
126 |     val res = ScalarMatrix $0(x.rows, x.cols)
127 |     applyCoord(x, res, x.rows - 1, x.cols - 1)
128 |   }
129 | 
130 |   @tailrec
131 |   private def derivCoord(fx: ScalarMatrix, res: ScalarMatrix, r: Int): ScalarMatrix =
132 |     if (r >= 0) {
133 |       val x = fx(r, 0)
134 |       if (x > 0.0f && x < 1.0f)
135 |         res.update(r, r, 0.25f)
136 |       // else res.update((r, r), 0.0f) [Already initialized as zero]
137 |       derivCoord(fx, res, r - 1)
138 |     } else
139 |       res
140 | 
141 |   @tailrec
142 |   private def applyCoord(x: ScalarMatrix, res: ScalarMatrix, r: Int, c: Int): ScalarMatrix =
143 |     if (r >= 0) {
144 |       val v = x(r, c)
145 |       // if (v < -2) res.update(r, c, 0.0f) [Already initailized as zero]
146 |       if (v > 2) res.update(r, c, 1.0f)
147 |       else res.update(r, c, 0.25f * v + 0.5f)
148 | 
149 |       if (c > 0)
150 |         applyCoord(x, res, r, c - 1)
151 |       else
152 |         applyCoord(x, res, r - 1, x.cols - 1)
153 |     } else
154 |       res
155 | }
156 | 
157 | /**
158 |  * __Activation Function__: Hard version of Tanh (Hyperbolic Tangent)
159 |  *
160 |  * @note `tanh(x) = sinh(x) / cosh(x)`, hard version approximates tanh as piecewise linear function.
161 |  *       We assumed the input of activation is a row vector.
162 |  * @example
163 |  * {{{val fx = HardTanh(0.0)
164 |  *          val diff = HardTanh.derivative(fx) }}}
165 |  */
166 | object HardTanh extends Activation {
167 |   /**
168 |    * Compute differentiation value of this function at `f(x) = fx`
169 |    *
170 |    * @param fx the __output__ of this function
171 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
172 |    */
173 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
174 |     // Because fx is n by 1 matrix, generate n by n matrix
175 |     val res = ScalarMatrix $0(fx.rows, fx.rows)
176 |     // Output is diagonal matrix, with dfi(xi)/dxi.
177 |     derivCoord(fx, res, fx.rows - 1)
178 |   }
179 | 
180 |   /**
181 |    * Compute mapping for `x`
182 |    *
183 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
184 |    * @return value of `f(x)`
185 |    */
186 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
187 |     val res = x.copy
188 |     applyCoord(x, res, x.rows - 1, x.cols - 1)
189 |   }
190 | 
191 |   @tailrec
192 |   private def derivCoord(fx: ScalarMatrix, res: ScalarMatrix, r: Int): ScalarMatrix =
193 |     if (r >= 0) {
194 |       val x = fx(r, 0)
195 |       if (x < 1.0f && x > -1.0f)
196 |         res.update(r, r, 1.0f)
197 |       // else res.update(r, r, 0.0f) [Already initalized as zero]
198 |       derivCoord(fx, res, r - 1)
199 |     } else
200 |       res
201 | 
202 |   @tailrec
203 |   private def applyCoord(x: ScalarMatrix, res: ScalarMatrix, r: Int, c: Int): ScalarMatrix =
204 |     if (r >= 0) {
205 |       val v = x(r, c)
206 |       if (v < -1) res.update(r, c, -1.0f)
207 |       else if (v > 1) res.update(r, c, 1.0f)
208 | 
209 |       if (c > 0)
210 |         applyCoord(x, res, r, c - 1)
211 |       else
212 |         applyCoord(x, res, r - 1, x.cols - 1)
213 |     } else
214 |       res
215 | }
216 | 
217 | /**
218 |  * __Activation Function__: Tanh (Hyperbolic Tangent)
219 |  *
220 |  * @note `tanh(x) = sinh(x) / cosh(x)`
221 |  *       We assumed the input of activation is a row vector.
222 |  * @example
223 |  * {{{val fx = HyperbolicTangent(0.0)
224 |  *         val diff = HyperbolicTangent.derivative(fx) }}}
225 |  */
226 | object HyperbolicTangent extends Activation {
227 |   /**
228 |    * Compute differentiation value of this function at `f(x) = fx`
229 |    *
230 |    * @param fx the __output__ of this function
231 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
232 |    */
233 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
234 |     // Output is diagonal matrix, with dfi(xi)/dxi.
235 |     val dVec: ScalarMatrix = 1.0f - (fx :* fx)
236 |     diag(dVec.toDenseVector)
237 |   }
238 | 
239 |   /**
240 |    * Compute mapping for `x`
241 |    *
242 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
243 |    * @return value of `f(x)`
244 |    */
245 |   override def apply(x: ScalarMatrix): ScalarMatrix = tanh(x)
246 | 
247 |   /**
248 |    * Initialize the weight matrix
249 |    *
250 |    * @param fanIn the number of __fan-in__ ''i.e. the number of neurons in previous layer''
251 |    * @param fanOut the number of __fan-out__ ''i.e. the number of neurons in next layer''
252 |    * @param rows the number of __rows of resulting matrix__ `(Default 0)`
253 |    * @param cols the number of __cols of resulting matrix__ `(Default 0)`
254 |    * @return the initialized weight matrix
255 |    */
256 |   override def initialize(fanIn: Int, fanOut: Int, rows: Int = 0, cols: Int = 0): ScalarMatrix = {
257 |     val range = Math.sqrt(6.0 / (fanIn + fanOut)).toFloat
258 |     val pmMatx: ScalarMatrix = ScalarMatrix.of(if (rows > 0) rows else fanOut, if (cols > 0) cols else fanIn) :- 0.5f
259 |     pmMatx :* (2.0f * range)
260 |   }
261 | }
262 | 
263 | /**
264 |  * __Activation Function__: Linear
265 |  *
266 |  * @note `linear(x) = x`
267 |  *       We assumed the input of activation is a row vector.
268 |  * @example
269 |   * {{{val fx = Linear(0.0)
270 |  *                   val diff = Linear.derivative(fx)}}}
271 |  */
272 | object Linear extends Activation {
273 |   /**
274 |    * Compute differentiation value of this function at `f(x) = fx`
275 |    *
276 |    * @param fx the __output__ of this function
277 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
278 |    */
279 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = DenseMatrix.eye[Scalar](fx.rows)
280 | 
281 |   /**
282 |    * Compute mapping for `x`
283 |    *
284 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
285 |    * @return value of `f(x)`
286 |    */
287 |   override def apply(x: ScalarMatrix): ScalarMatrix = x.copy
288 | }
289 | 
290 | /**
291 |  * __Activation Function__: Rectifier
292 |  *
293 |  * @note `rectifier(x) = x if x > 0, otherwise 0`
294 |  *       We assumed the input of activation is a row vector.
295 |  * @example
296 |  * {{{val fx = Rectifier(0.0)
297 |  *         val diff = Rectifier.derivative(fx)}}}
298 |  */
299 | object Rectifier extends Activation {
300 |   /**
301 |    * Compute differentiation value of this function at `f(x) = fx`
302 |    *
303 |    * @param fx the __output__ of this function
304 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
305 |    */
306 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
307 |     // Because fx is n by 1 matrix, generate n by n matrix
308 |     val res = ScalarMatrix $0(fx.rows, fx.rows)
309 |     // Output is diagonal matrix, with dfi(xi)/dxi.
310 |     derivCoord(fx, res, fx.rows - 1)
311 |   }
312 | 
313 |   /**
314 |    * Compute mapping for `x`
315 |    *
316 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
317 |    * @return value of `f(x)`
318 |    */
319 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
320 |     val res = x.copy
321 |     applyCoord(x, res, x.rows - 1, x.cols - 1)
322 |   }
323 | 
324 |   @tailrec
325 |   private def derivCoord(fx: ScalarMatrix, res: ScalarMatrix, r: Int): ScalarMatrix =
326 |     if (r >= 0) {
327 |       val x = fx(r, 0)
328 |       if (x > 0)
329 |         res.update(r, r, 1.0f)
330 |       //else res.update(r, r, 0.0f) [Already Initialized as zero]
331 |       derivCoord(fx, res, r - 1)
332 |     } else
333 |       res
334 | 
335 |   @tailrec
336 |   private def applyCoord(x: ScalarMatrix, res: DenseMatrix[Scalar], r: Int, c: Int): ScalarMatrix =
337 |     if (r >= 0) {
338 |       if (x(r, c) < 0) res.update(r, c, 0.0f)
339 | 
340 |       if (c > 0) applyCoord(x, res, r, c - 1)
341 |       else applyCoord(x, res, r - 1, x.cols - 1)
342 |     } else
343 |       res
344 | }
345 | 
346 | /**
347 |  * __Activation Function__: Sigmoid function
348 |  *
349 |  * @note {{{sigmoid(x) = 1 / [exp(-x) + 1]}}}
350 |  *       We assumed the input of activation is a row vector.
351 |  * @example
352 |  * {{{val fx = Sigmoid(0.0)
353 |  *         val diff = Sigmoid.derivative(fx)}}}
354 |  */
355 | object Sigmoid extends Activation {
356 |   /**
357 |    * Compute differentiation value of this function at `f(x) = fx`
358 |    *
359 |    * @param fx the __output__ of this function
360 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
361 |    */
362 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
363 |     // Output is diagonal matrix, with dfi(xi)/dxi.
364 |     val dVec: ScalarMatrix = (1.0f - fx) :* fx
365 |     diag(dVec.toDenseVector)
366 |   }
367 | 
368 |   /**
369 |    * Compute mapping for `x`
370 |    *
371 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
372 |    * @return value of `f(x)`
373 |    */
374 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
375 |     val expv: ScalarMatrix = exp(x)
376 |     val exp1: ScalarMatrix = expv :+ 1.0f
377 |     1.0f / exp1
378 |   }
379 | 
380 |   /**
381 |    * Initialize the weight matrix
382 |    *
383 |    * @param fanIn the number of __fan-in__ ''i.e. the number of neurons in previous layer''
384 |    * @param fanOut the number of __fan-out__ ''i.e. the number of neurons in next layer''
385 |    * @param rows the number of __rows of resulting matrix__ `(Default 0)`
386 |    * @param cols the number of __cols of resulting matrix__ `(Default 0)`
387 |    * @return the initialized weight matrix
388 |    */
389 |   override def initialize(fanIn: Int, fanOut: Int, rows: Int = 0, cols: Int = 0): ScalarMatrix = {
390 |     val range = (Math.sqrt(6.0 / (fanIn + fanOut)) * 4.0).toFloat
391 |     val pmMatx: ScalarMatrix = ScalarMatrix.of(if (rows > 0) rows else fanOut, if (cols > 0) cols else fanIn) :- 0.5f
392 |     pmMatx :* (2.0f * range)
393 |   }
394 | }
395 | 
396 | /**
397 |  * __Activation Function__: Softmax function
398 |  *
399 |  * @note {{{softmax(x)_i = exp(x_i) / sum(exp(x_i))}}}
400 |  *       We assumed the input of activation is a row vector.
401 |  * @example
402 |  * {{{val fx = Softmax(0.0)
403 |  *          val diff = Softmax.derivative(fx)}}}
404 |  */
405 | object Softmax extends Activation {
406 |   /**
407 |    * Compute differentiation value of this function at `f(x) = fx`
408 |    *
409 |    * @param fx the __output__ of this function
410 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
411 |    */
412 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
413 |     val res: ScalarMatrix = ScalarMatrix $0(fx.rows, fx.rows)
414 | 
415 |     // Note that (i, j)-entry of deriviative is dF_i / dX_j
416 |     // and dF_i / dX_j = F(i) * (Delta_ij - F(j)).
417 |     initDeriv(fx, res, fx.rows - 1)
418 |     derivCoord(fx, res, res.rows - 1, res.cols - 1)
419 |   }
420 | 
421 |   /**
422 |    * Compute mapping for `x`
423 |    *
424 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
425 |    * @return value of `f(x)`
426 |    */
427 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
428 |     val expv: ScalarMatrix = exp(x)
429 |     val normalize: Scalar = sum(expv)
430 |     expv :/= normalize
431 |   }
432 | 
433 |   /**
434 |    * Initialize the weight matrix
435 |    *
436 |    * @param fanIn the number of __fan-in__ ''i.e. the number of neurons in previous layer''
437 |    * @param fanOut the number of __fan-out__ ''i.e. the number of neurons in next layer''
438 |    * @param rows the number of __rows of resulting matrix__ `(Default 0)`
439 |    * @param cols the number of __cols of resulting matrix__ `(Default 0)`
440 |    * @return the initialized weight matrix
441 |    */
442 |   override def initialize(fanIn: Int, fanOut: Int, rows: Int = 0, cols: Int = 0): ScalarMatrix = {
443 |     val range = (Math.sqrt(6.0 / (fanIn + fanOut)) * 4.0).toFloat
444 |     val pmMatx: ScalarMatrix = ScalarMatrix.of(if (rows > 0) rows else fanOut, if (cols > 0) cols else fanIn) :- 0.5f
445 |     pmMatx :* (2.0f * range)
446 |   }
447 | 
448 |   @tailrec
449 |   private def initDeriv(fx: ScalarMatrix, res: ScalarMatrix, r: Int): Unit =
450 |     if (r >= 0) {
451 |       res(r, ::) := fx(r, 0)
452 |       initDeriv(fx, res, r - 1)
453 |     }
454 | 
455 |   @tailrec
456 |   private def derivCoord(fx: ScalarMatrix, res: ScalarMatrix, r: Int, c: Int): ScalarMatrix =
457 |     if (r >= 0) {
458 |       val dfdx = (if (r == c) 1 else 0) - fx(c, 0)
459 |       res.update(r, c, res(r, c) * dfdx)
460 | 
461 |       if (c > 0) derivCoord(fx, res, r, c - 1)
462 |       else derivCoord(fx, res, r - 1, fx.rows - 1)
463 |     } else
464 |       res
465 | }
466 | 
467 | /**
468 |  * __Activation Function__: Softplus
469 |  *
470 |  * @note `softplus(x) = log[1 + exp(x)]`
471 |  *       We assumed the input of activation is a row vector.
472 |  * @example
473 |  * {{{val fx = Softplus(0.0)
474 |  *         val diff = Softplus.derivative(fx)}}}
475 |  */
476 | object Softplus extends Activation {
477 |   /**
478 |    * Compute differentiation value of this function at `f(x) = fx`
479 |    *
480 |    * @param fx the __output__ of this function
481 |    * @return differentiation value at `f(x) = fx`, which should be an __square, symmetric matrix__
482 |    */
483 |   override def derivative(fx: ScalarMatrix): ScalarMatrix = {
484 |     // Output is diagonal matrix, with dfi(xi)/dxi.
485 |     val expv: ScalarMatrix = exp(fx)
486 |     val exp1: ScalarMatrix = expv - 1.0f
487 |     val dVec: ScalarMatrix = exp1 / expv
488 |     diag(dVec.toDenseVector)
489 |   }
490 | 
491 |   /**
492 |    * Compute mapping for `x`
493 |    *
494 |    * @param x the __input__ matrix. ''Before application, input should be summed already.''
495 |    * @return value of `f(x)`
496 |    */
497 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
498 |     val expx = exp(x)
499 |     val plus1 = expx :+ 1.0f
500 |     log(plus1)
501 |   }
502 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/fn/Objective.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.fn
  2 | 
  3 | import breeze.linalg.{DenseMatrix, sum}
  4 | import breeze.numerics._
  5 | 
  6 | /**
  7 |  * __Trait__ that describes an objective function for '''entire network'''
  8 |  *
  9 |  * Because these objective functions can be shared, we recommend to make inherited one as an object. 
 10 |  */
 11 | trait Objective extends ((ScalarMatrix, ScalarMatrix) ⇒ Scalar) with Serializable {
 12 |   /**
 13 |    * Compute differentiation value of this objective function at `x = r - o`
 14 |    *
 15 |    * @param real the expected __real output__, `r`
 16 |    * @param output the computed __output of the network__, `o`
 17 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
 18 |    */
 19 |   def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix
 20 | 
 21 |   /**
 22 |    * Compute error (loss)
 23 |    *
 24 |    * @param real the expected __real output__
 25 |    * @param output the computed __output of the network__
 26 |    * @return the error
 27 |    */
 28 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar
 29 | }
 30 | 
 31 | 
 32 | /**
 33 |  * __Objective Function__: Cosine Similarity Error
 34 |  *
 35 |  * This has a heavy computation. If you want to use lighter one, use [[DotProductErr]]
 36 |  *
 37 |  * @note This function returns 1 - cosine similarity, i.e. cosine dissimiarlity.
 38 |  *
 39 |  * @example
 40 |  * {{{val output = net(input)
 41 |  *         val err = CosineErr(real, output)
 42 |  *         val diff = CosineErr.derivative(real, output)
 43 |  * }}}
 44 |  */
 45 | object CosineErr extends Objective {
 46 |   /**
 47 |    * Compute differentiation value of this objective function w.r.t output o
 48 |    *
 49 |    * @param real the expected __real output__, `r`
 50 |    * @param output the computed __output of the network__, `o`
 51 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
 52 |    */
 53 |   override def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix = {
 54 |     val dotValue: Scalar = sum(real :* output)
 55 | 
 56 |     val lenReal = len(real)
 57 |     val lenOut = len(output)
 58 |     val lenOutSq = lenOut * lenOut
 59 |     // Denominator of derivative is len(real) * len(output)^3
 60 |     val denominator = lenReal * lenOut * lenOutSq
 61 | 
 62 |     DenseMatrix.tabulate(output.rows, output.cols) {
 63 |       (r, c) ⇒
 64 |         val x = output(r, c)
 65 |         val a = real(r, c)
 66 |         // The nominator of derivative of cosine similarity is,
 67 |         // a(lenOut^2 - x^2) - x(dot - a*x)
 68 |         // = a*lenOut^2 - x*dot
 69 |         val nominator = a * lenOutSq - x * dotValue
 70 | 
 71 |         // We need derivative of 1 - cosine.
 72 |         -(nominator / denominator)
 73 |     }
 74 |   }
 75 | 
 76 |   /**
 77 |    * Compute error (loss)
 78 |    *
 79 |    * @param real the expected __real output__
 80 |    * @param output the computed __output of the network__
 81 |    * @return the error
 82 |    */
 83 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar = {
 84 |     val norm = len(real) * len(output)
 85 |     val dotValue: Scalar = sum(real :* output)
 86 |     1.0f - (dotValue / norm)
 87 |   }
 88 | 
 89 |   /**
 90 |    * length of given matrix
 91 |    * @param matrix matrix
 92 |    * @return length = sqrt(sum(pow(:, 2)))
 93 |    */
 94 |   private def len(matrix: ScalarMatrix): Scalar = {
 95 |     Math.sqrt(sum(pow(matrix, 2.0f))).toFloat
 96 |   }
 97 | }
 98 | 
 99 | /**
100 |  * __Objective Function__: Sum of Cross-Entropy (Logistic)
101 |  *
102 |  * @note This objective function prefer 0/1 output
103 |  * @example
104 |  * {{{val output = net(input)
105 |  *         val err = CrossEntropyErr(real, output)
106 |  *         val diff = CrossEntropyErr.derivative(real, output)
107 |  * }}}
108 |  */
109 | object CrossEntropyErr extends Objective {
110 |   /**
111 |    * Entropy function
112 |    */
113 |   val entropy = (r: Scalar, o: Scalar) ⇒
114 |     (if (r != 0.0f) -r * Math.log(o).toFloat else 0.0f) + (if (r != 1.0f) -(1.0f - r) * Math.log(1.0f - o).toFloat else 0.0f)
115 | 
116 |   /**
117 |    * Derivative of Entropy function
118 |    */
119 |   val entropyDiff = (r: Scalar, o: Scalar) ⇒ (r - o) / (o * (o - 1.0f))
120 | 
121 |   /**
122 |    * Compute differentiation value of this objective function w.r.t output o
123 |    *
124 |    * @param real the expected __real output__, `r`
125 |    * @param output the computed __output of the network__, `o`
126 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
127 |    */
128 |   override def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix =
129 |     DenseMatrix.tabulate(real.rows, real.cols)((r, c) ⇒ entropyDiff(real(r, c), output(r, c)))
130 | 
131 |   /**
132 |    * Compute error (loss)
133 |    *
134 |    * @param real the expected __real output__
135 |    * @param output the computed __output of the network__
136 |    * @return the error
137 |    */
138 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar =
139 |     sum(DenseMatrix.tabulate(real.rows, real.cols)((r, c) ⇒ entropy(real(r, c), output(r, c))))
140 | }
141 | 
142 | /**
143 |  * __Objective Function__: Dot-product Error
144 |  *
145 |  * @note This function computes additive inverse of dot product, i.e. dot-product dissimiarity.
146 |  *       Also this function assumes all matrices between [-1, 1]^N^
147 |  *
148 |  * @example
149 |  * {{{val output = net(input)
150 |  *         val err = DotProductErr(real, output)
151 |  *         val diff = DotProductErr.derivative(real, output)
152 |  * }}}
153 |  */
154 | object DotProductErr extends Objective {
155 |   /**
156 |    * Compute differentiation value of this objective function w.r.t output o
157 |    *
158 |    * @param real the expected __real output__, `r`
159 |    * @param output the computed __output of the network__, `o`
160 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
161 |    */
162 |   override def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix = -real
163 | 
164 |   /**
165 |    * Compute error (loss)
166 |    *
167 |    * @param real the expected __real output__
168 |    * @param output the computed __output of the network__
169 |    * @return the error
170 |    */
171 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar = real.rows - sum(real :* output)
172 | }
173 | 
174 | /**
175 |  * __Objective Function__: Sum of Squared Error
176 |  *
177 |  * @example
178 |  * {{{val output = net(input)
179 |  *         val err = SquaredErr(real, output)
180 |  *         val diff = SquaredErr.derivative(real, output)
181 |  * }}}
182 |  */
183 | object SquaredErr extends Objective {
184 |   /**
185 |    * Compute differentiation value of this objective function w.r.t output o
186 |    *
187 |    * @param real the expected __real output__, `r`
188 |    * @param output the computed __output of the network__, `o`
189 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
190 |    */
191 |   override def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix = output - real
192 | 
193 |   /**
194 |    * Compute error (loss)
195 |    *
196 |    * @param real the expected __real output__
197 |    * @param output the computed __output of the network__
198 |    * @return the error
199 |    */
200 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar = {
201 |     val diff = real - output
202 |     sum(pow(diff, 2.0f))
203 |   }
204 | }
205 | 
206 | /**
207 |  * __Objective Function__: Sum of Absolute Error
208 |  *
209 |  * @note In mathematics, L,,1,,-distance is called ''Manhattan distance.''
210 |  *
211 |  * @example
212 |  * {{{val output = net(input)
213 |  *          val err = ManhattanErr(real, output)
214 |  *          val diff = ManhattanErr.derivative(real, output)
215 |  * }}}
216 |  */
217 | object ManhattanErr extends Objective {
218 |   /**
219 |    * Compute differentiation value of this objective function w.r.t output o
220 |    *
221 |    * @param real the expected __real output__, `r`
222 |    * @param output the computed __output of the network__, `o`
223 |    * @return differentiation value at `f(x)=fx`, which is __a column vector__
224 |    */
225 |   override def derivative(real: ScalarMatrix, output: ScalarMatrix): ScalarMatrix =
226 |     DenseMatrix.tabulate(real.rows, real.cols) {
227 |       (r, c) ⇒
228 |         val target = real(r, c)
229 |         val x = output(r, c)
230 |         if (target > x) 1.0f
231 |         else if (target < x) -1.0f
232 |         else 0.0f
233 |     }
234 | 
235 |   /**
236 |    * Compute error (loss)
237 |    *
238 |    * @param real the expected __real output__
239 |    * @param output the computed __output of the network__
240 |    * @return the error
241 |    */
242 |   override def apply(real: ScalarMatrix, output: ScalarMatrix): Scalar = {
243 |     val diff = real - output
244 |     sum(abs(diff))
245 |   }
246 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/fn/ScalarMatrix.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.fn
 2 | 
 3 | import java.util.concurrent.ThreadLocalRandom
 4 | 
 5 | import breeze.linalg.DenseMatrix
 6 | 
 7 | /**
 8 |  * Companion Object of ScalarMatrix
 9 |  *
10 |  * This object defines various shortcuts. 
11 |  */
12 | object ScalarMatrix {
13 |   /**
14 |    * Generates full-one matrix of given size
15 |    *
16 |    * @param size __(#row, #col) pair__ of matrix size, such as (2, 3)
17 |    * @return Matrix with initialized by one
18 |    */
19 |   def $1(size: (Int, Int)): ScalarMatrix = DenseMatrix.ones[Scalar](size._1, size._2)
20 | 
21 |   /**
22 |    * Generates full-random matrix of given size
23 |    *
24 |    * @param size __(#row, #col) pair__ of matrix size, such as (2, 3)
25 |    * @return Matrix with initialized by random number
26 |    */
27 |   def of(size: (Int, Int)): ScalarMatrix =
28 |     DenseMatrix.tabulate[Scalar](size._1, size._2)((_, _) ⇒ ThreadLocalRandom.current().nextFloat())
29 | 
30 |   /**
31 |    * Generate full 0-1 matrix of given size. __Probability of 1's occurrence__ is given.
32 |    *
33 |    * @param pair __(#row, #col, probability)__ pair, where (#row, #col) indicates the matrix size, probability indicates the probability of 1's occurrence.
34 |    * @return generated matrix
35 |    */
36 |   def $01(pair: (Int, Int, Probability)): ScalarMatrix =
37 |     DenseMatrix.tabulate[Scalar](pair._1, pair._2)(
38 |       (_, _) ⇒ if (ThreadLocalRandom.current().nextFloat() > pair._3) 0.0f else 1.0f
39 |     )
40 | 
41 |   /**
42 |    * Restore a matrix from JSON seq.
43 |    *
44 |    * @param arr 2D Sequence to be restored
45 |    * @return restored matrix
46 |    */
47 |   def restore(arr: IndexedSeq[IndexedSeq[String]]): ScalarMatrix = {
48 |     val res = $0(arr.size, arr.head.size)
49 |     var r = 0
50 |     while (r < arr.size) {
51 |       var c = 0
52 |       while (c < arr(r).size) {
53 |         res.update(r, c, arr(r)(c).toFloat)
54 |         c += 1
55 |       }
56 |       r += 1
57 |     }
58 |     res
59 |   }
60 | 
61 |   /**
62 |    * Generates full-zero matrix of given size
63 |    *
64 |    * @param size __(#row, #col) pair__ of matrix size, such as (2, 3)
65 |    * @return Matrix with initialized by zero
66 |    */
67 |   def $0(size: (Int, Int)): ScalarMatrix = DenseMatrix.zeros[Scalar](size._1, size._2)
68 | 
69 |   /**
70 |    * Make a column vector with given sequence.
71 |    *
72 |    * @param seq Sequence of entries, from (1,1) to (size, 1).
73 |    * @return column vector with given sequence
74 |    */
75 |   def apply(seq: Float*): ScalarMatrix = DenseMatrix.create(seq.size, 1, seq.toArray)
76 | 
77 |   /**
78 |    * An empty matrix for RAE training.
79 |    *
80 |    * @return an empty matrix
81 |    */
82 |   def empty: ScalarMatrix = ScalarMatrix $0(0, 0)
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/fn/WeightUpdater.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.fn
  2 | 
  3 | import breeze.linalg.sum
  4 | import breeze.numerics._
  5 | 
  6 | import scala.collection.mutable.ArrayBuffer
  7 | 
  8 | /**
  9 |  * __Trait__ that describes the algorithm for weight update
 10 |  *
 11 |  * Because each weight update requires history, we recommend to make inherited one as a class. 
 12 |  */
 13 | trait WeightUpdater extends ((IndexedSeq[ScalarMatrix], IndexedSeq[ScalarMatrix]) ⇒ Unit) with Serializable {
 14 |   /** Decay factor for L,,1,, regularization */
 15 |   protected val l1decay: Scalar
 16 |   /** Decay factor for L,,2,, regularization */
 17 |   protected val l2decay: Scalar
 18 | 
 19 |   /**
 20 |    * Execute the algorithm for given __sequence of Δweight__ and sequence of __weights__
 21 |    *
 22 |    * @param delta the __sequence of accumulated Δweight__
 23 |    * @param weight the __sequence of current weights__
 24 |    */
 25 |   override def apply(delta: IndexedSeq[ScalarMatrix], weight: IndexedSeq[ScalarMatrix]): Unit
 26 | 
 27 |   /**
 28 |    * Compute weight-loss of given weight parameters
 29 |    *
 30 |    * @param seq the __sequence__ of weight matrices
 31 |    * @return the total weight loss of this sequence
 32 |    */
 33 |   def loss(seq: Seq[ScalarMatrix]) = {
 34 |     var i = 0
 35 |     var err = 0.0f
 36 |     while (i < seq.size) {
 37 |       val obj = seq(i)
 38 |       val l1loss = sum(abs(obj)) * l1decay
 39 |       val l2loss = sum(pow(obj, 2)) * l2decay
 40 |       err += l1loss + l2loss
 41 |       i += 1
 42 |     }
 43 |     err
 44 |   }
 45 | }
 46 | 
 47 | /**
 48 |  * __Algorithm__: AdaDelta algorithm
 49 |  *
 50 |  * If you are trying to use this algorithm for your research, you should add a reference to [[http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf AdaDelta techinical report]].
 51 |  *
 52 |  * @param l1decay L,,1,, regularization factor `(Default 0.0)`
 53 |  * @param l2decay L,,2,, regularization factor `(Default 0.0001)`
 54 |  * @param decay AdaDelta history decay factor `(Default 95% = 0.95)`
 55 |  * @param epsilon AdaDelta base factor `(Default 1e-6)`
 56 |  *
 57 |  * @example {{{val algorithm = new AdaDelta(l2decay = 0.0001)}}}
 58 |  */
 59 | class AdaDelta(protected override val l1decay: Scalar = 0.0000f,
 60 |                protected override val l2decay: Scalar = 0.0001f,
 61 |                private val decay: Scalar = 0.95f,
 62 |                private val epsilon: Scalar = 1e-6f)
 63 |   extends WeightUpdater {
 64 |   /** accumulated history of gradients */
 65 |   private val gradSq = ArrayBuffer[ScalarMatrix]()
 66 |   /** accumulated history of parameter updates */
 67 |   private val deltaSq = ArrayBuffer[ScalarMatrix]()
 68 | 
 69 |   /**
 70 |    * Execute the algorithm for given __sequence of Δ(weight)__ and sequence of __weights__
 71 |    *
 72 |    * @param delta the __sequence of accumulated Δ(weight)__
 73 |    * @param weight the __sequence of current weights__
 74 |    */
 75 |   override def apply(delta: IndexedSeq[ScalarMatrix], weight: IndexedSeq[ScalarMatrix]): Unit = {
 76 |     if (gradSq.isEmpty) {
 77 |       gradSq.sizeHint(delta)
 78 |       deltaSq.sizeHint(delta)
 79 | 
 80 |       var i = 0
 81 |       while (i < delta.length) {
 82 |         val matx = delta(i)
 83 |         i += 1
 84 | 
 85 |         gradSq += ScalarMatrix.$0(matx.rows, matx.cols)
 86 |         deltaSq += ScalarMatrix.$0(matx.rows, matx.cols)
 87 |       }
 88 |     }
 89 | 
 90 |     delta.indices.par.foreach {
 91 |       id ⇒
 92 |         val w = weight(id)
 93 |         val deltaW = delta(id)
 94 |         val gSq = gradSq(id)
 95 |         val dSq = deltaSq(id)
 96 | 
 97 |         val l1: ScalarMatrix = signum(w) * l1decay
 98 |         val l2: ScalarMatrix = (l2decay * 2) * w
 99 |         val d = l1 + l2 + deltaW
100 | 
101 |         gSq *= decay
102 |         gSq += (1.0f - decay) * (d :* d)
103 | 
104 |         val r1: ScalarMatrix = sqrt(dSq + epsilon)
105 |         val r2: ScalarMatrix = sqrt(gSq + epsilon)
106 |         val rate: ScalarMatrix = r1 / r2
107 | 
108 |         val dw = d :* rate
109 |         w -= dw
110 | 
111 |         dSq *= decay
112 |         dSq += (1.0f - decay) * (dw :* dw)
113 |     }
114 |   }
115 | }
116 | 
117 | 
118 | /**
119 |  * __Algorithm__: AdaGrad algorithm.
120 |  *
121 |  * If you are trying to use this algorithm for your research, you should add a reference to [[http://www.magicbroom.info/Papers/DuchiHaSi10.pdf AdaGrad paper]].
122 |  *
123 |  * @param rate the learning rate `(Default 0.6)`
124 |  * @param l1decay L,,1,, regularization factor `(Default 0.0)`
125 |  * @param l2decay L,,2,, regularization factor `(Default 0.0001)`
126 |  *
127 |  *
128 |  * @example {{{val algorithm = new AdaGrad(l2decay = 0.0001)}}}
129 |  */
130 | class AdaGrad(rate: Scalar = 0.6f,
131 |               protected override val l1decay: Scalar = 0.0000f,
132 |               protected override val l2decay: Scalar = 0.0001f)
133 |   extends WeightUpdater {
134 |   /** accumulated history of parameter updates */
135 |   private val history = ArrayBuffer[ScalarMatrix]()
136 | 
137 |   /**
138 |    * Execute the algorithm for given __sequence of Δweight__ and sequence of __weights__
139 |    *
140 |    * @param delta the __sequence of accumulated Δweight__
141 |    * @param weight the __sequence of current weights__
142 |    */
143 |   override def apply(delta: IndexedSeq[ScalarMatrix], weight: IndexedSeq[ScalarMatrix]): Unit = {
144 |     if (history.isEmpty) {
145 |       history.sizeHint(delta)
146 | 
147 |       var i = 0
148 |       while (i < delta.size) {
149 |         val matx = delta(i)
150 |         i += 1
151 |         history += ScalarMatrix.$0(matx.rows, matx.cols)
152 |       }
153 |     }
154 | 
155 |     delta.indices.par.foreach {
156 |       id ⇒
157 |         val w = weight(id)
158 |         val deltaW = delta(id)
159 |         val hW = history(id)
160 | 
161 |         val l1: ScalarMatrix = signum(w) * l1decay
162 |         val l2: ScalarMatrix = (l2decay * 2) * w
163 |         val d = l1 + l2 + deltaW
164 | 
165 |         hW += (d :* d)
166 | 
167 |         val arate: ScalarMatrix = rate / sqrt(hW)
168 |         val dw: ScalarMatrix = d :* arate
169 |         w -= dw
170 |     }
171 |   }
172 | }
173 | 
174 | /**
175 |  * __Algorithm__: Stochastic Gradient Descent
176 |  *
177 |  * Basic Gradient Descent rule with mini-batch training.
178 |  *
179 |  * @param rate the learning rate `(Default 0.03)`
180 |  * @param l1decay L,,1,, regularization factor `(Default 0.0)`
181 |  * @param l2decay L,,2,, regularization factor `(Default 0.0001)`
182 |  * @param momentum Momentum factor for adaptive learning `(Default 0.0001)`
183 |  *
184 |  * @example {{{val algorithm = new StochasticGradientDescent(l2decay = 0.0001)}}}
185 |  */
186 | class StochasticGradientDescent(rate: Scalar = 0.03f,
187 |                                 protected override val l1decay: Scalar = 0.0000f,
188 |                                 protected override val l2decay: Scalar = 0.0001f,
189 |                                 momentum: Scalar = 0.0001f)
190 |   extends WeightUpdater {
191 |   /** the last update of parameters */
192 |   private val lastDelta = ArrayBuffer[ScalarMatrix]()
193 | 
194 |   /**
195 |    * Execute the algorithm for given __sequence of Δweight__ and sequence of __weights__
196 |    *
197 |    * @param delta the __sequence of accumulated Δweight__
198 |    * @param weight the __sequence of current weights__
199 |    */
200 |   override def apply(delta: IndexedSeq[ScalarMatrix], weight: IndexedSeq[ScalarMatrix]): Unit = {
201 |     if (momentum > 0 && lastDelta.isEmpty) {
202 |       lastDelta.sizeHint(delta)
203 |       var i = 0
204 |       while (i < delta.size) {
205 |         val matx = delta(i)
206 |         lastDelta += ScalarMatrix.$0(matx.rows, matx.cols)
207 |         i += 1
208 |       }
209 |     }
210 | 
211 |     delta.indices.par.foreach {
212 |       id ⇒
213 |         val w = weight(id)
214 |         val deltaW = delta(id)
215 |         val hW = if (momentum > 0) lastDelta(id) else null
216 | 
217 |         val l1: ScalarMatrix = signum(w) * l1decay
218 |         val l2: ScalarMatrix = (l2decay * 2) * w
219 |         val d = l1 + l2 + deltaW
220 |         if (hW != null) {
221 |           hW *= momentum
222 |           hW -= d
223 |           w -= hW
224 |         } else {
225 |           w -= d
226 |         }
227 |     }
228 |   }
229 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/fn/package.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep
  2 | 
  3 | import breeze.linalg.DenseMatrix
  4 | import play.api.libs.json.{JsArray, JsObject, JsString, Json}
  5 | 
  6 | /**
  7 |  * Package for various functions.
  8 |  */
  9 | package object fn {
 10 |   /** Type of scalar **/
 11 |   type Scalar = Float
 12 |   /** Type of probability **/
 13 |   type Probability = Float
 14 |   /** Type of Neuron Input **/
 15 |   type ScalarMatrix = DenseMatrix[Scalar]
 16 |   /** Define Alias **/
 17 |   val Tanh = HyperbolicTangent
 18 | 
 19 |   /**
 20 |    * Defines sugar operations for ScalarMatrix
 21 |    *
 22 |    * @param x the __matrix__ to be computed
 23 |    */
 24 |   implicit class ScalarMatrixOp(x: ScalarMatrix) {
 25 |     /**
 26 |      * Append __given scalar__ to last row.
 27 |      *
 28 |      * @param y a __scalar__ to be added
 29 |      */
 30 |     def row_+(y: Scalar): ScalarMatrix = {
 31 |       val scalar: ScalarMatrix = (ScalarMatrix $1(1, x.cols)) :* y
 32 |       x row_+ scalar
 33 |     }
 34 | 
 35 |     /**
 36 |      * Append __given matrix__ to last rows.
 37 |      *
 38 |      * @param y a __matrix__ to be added
 39 |      */
 40 |     def row_+(y: ScalarMatrix): ScalarMatrix = {
 41 |       DenseMatrix.vertcat(x, y)
 42 |     }
 43 | 
 44 |     /**
 45 |      * Append __given matrix__ to last columns.
 46 |      *
 47 |      * @param y a __matrix__ to be added
 48 |      */
 49 |     def col_+(y: ScalarMatrix) = {
 50 |       DenseMatrix.horzcat(x, y)
 51 |     }
 52 | 
 53 |     /**
 54 |      * Make given matrix as 2D JSON Array
 55 |      *
 56 |      * @return JsArray of this matrix
 57 |      */
 58 |     def to2DSeq: JsArray = {
 59 |       val r = x.rows
 60 |       val c = x.cols
 61 |       JsArray((0 until r) map {
 62 |         i ⇒ JsArray((0 until c) map {
 63 |           j ⇒ JsString(f"${x(i, j)}%.8f")
 64 |         })
 65 |       })
 66 |     }
 67 | 
 68 |     /**
 69 |      * String representation of matrix
 70 |      *
 71 |      * @return string representation
 72 |      */
 73 |     def mkString: String =
 74 |       "{" + (((0 until x.rows) map {
 75 |         r ⇒ "[" + (((0 until x.cols) map { c ⇒ f"${x(r, c)}%.3f"}) mkString ", ") + "]"
 76 |       }) mkString ", ") + "}"
 77 |   }
 78 | 
 79 |   /**
 80 |    * Defines sugar operations of probability
 81 |    *
 82 |    * @param x __scalar__ to be applied
 83 |    */
 84 |   implicit class ProbabilityOp(x: Probability) {
 85 |     /**
 86 |      * Returns safe probability
 87 |      *
 88 |      * @return probability between 0 and 1
 89 |      */
 90 |     def safe = if (0.0 <= x && x <= 1.0) x else if (x < 0.0) 0.0f else 1.0f
 91 |   }
 92 | 
 93 |   /**
 94 |    * Defines sugar operations of sequence of weights
 95 |    *
 96 |    * @param w __matrix sequence__ to be applied.
 97 |    */
 98 |   implicit class WeightSeqOp(w: IndexedSeq[ScalarMatrix]) {
 99 |     /**
100 |      * Assign scalar 
101 |      *
102 |      * @param x __scalar__ to be assigned for every cell
103 |      */
104 |     @deprecated
105 |     def :=(x: Scalar) = w.par.map(_ := 0f)
106 | 
107 |     /**
108 |      * Assign matrices 
109 |      * @param w2 to be assigned
110 |      */
111 |     def :=(w2: IndexedSeq[ScalarMatrix]) = {
112 |       var i = w.size - 1
113 |       while (i >= 0) {
114 |         w(i) := w2(i)
115 |         i -= 1
116 |       }
117 |     }
118 | 
119 |     /**
120 |      * Copy these matrices
121 |      *
122 |      * @return copied matrices
123 |      */
124 |     def copy = w.par.map(_.copy).toIndexedSeq
125 | 
126 |     /**
127 |      * Add another matrices in-place. 
128 |      *
129 |      * @param w2 __matrix sequence__ to be added
130 |      */
131 |     def :+=(w2: IndexedSeq[ScalarMatrix]) = {
132 |       (0 until w.size).par.map(i ⇒ w(i) :+= w2(i))
133 |       w
134 |     }
135 | 
136 |     /**
137 |      * Divide matrices with given scalar
138 |      *
139 |      * @param x __scalar__ as a divider.
140 |      */
141 |     def :/=(x: Scalar) = {
142 |       w.par.map(_ :/= x)
143 |     }
144 |   }
145 | 
146 |   /**
147 |    * Defines transformation of new activation function.
148 |    *
149 |    * @param act Activation function to be transformed.
150 |    */
151 |   implicit class ActivationOp(act: Activation) extends Serializable {
152 |     /**
153 |      * Scale Activation function = `sY * f(x * sX)`
154 |      * @param sX scale factor along input
155 |      * @param sY scale factor along output
156 |      * @return new Activation Function
157 |      */
158 |     def *(sX: Float = 1f, sY: Float = 1f) = new Activation {
159 |       /**
160 |        * Compute differentiation value of this function at `f(x) = fx`
161 |        *
162 |        * @param fx the __output__ of this function
163 |        * @return differentiation value at `f(x) = fx`, which should be an __square, diagonal matrix__
164 |        */
165 |       override def derivative(fx: ScalarMatrix): ScalarMatrix = act.derivative(fx :* sX) :* (sY * sX)
166 | 
167 |       /**
168 |        * Compute mapping for `x`
169 |        *
170 |        * @param x the __input__ matrix. ''Before application, input should be summed already.''
171 |        * @return value of `f(x)`
172 |        */
173 |       override def apply(x: ScalarMatrix): ScalarMatrix = act(x :* sX) :* sY
174 | 
175 |       /**
176 |        * Serialize Activation function into String.
177 |        * @note If this is an "object", do not modify this function.
178 |        *       This does not supports Activation Operations defined outside of this package.
179 |        * @return JSON object states this function
180 |        */
181 |       override def toJSON: JsObject = Json.obj(
182 |         "function" → "scale",
183 |         "base" → act.toJSON,
184 |         "X" → sX,
185 |         "Y" → sY
186 |       )
187 |     }
188 | 
189 |     /**
190 |      * Translate Activation function = `dY + f(x - dX)`
191 |      * @param dX shift amount along input
192 |      * @param dY shift amount along output
193 |      * @return new Activation Function
194 |      */
195 |     def +(dX: Float = 0f, dY: Float = 0f) = new Activation {
196 |       /**
197 |        * Compute differentiation value of this function at `f(x) = fx`
198 |        *
199 |        * @param fx the __output__ of this function
200 |        * @return differentiation value at `f(x) = fx`, which should be an __square, diagonal matrix__
201 |        */
202 |       override def derivative(fx: ScalarMatrix): ScalarMatrix = act.derivative(fx :- dX)
203 | 
204 |       /**
205 |        * Compute mapping for `x`
206 |        *
207 |        * @param x the __input__ matrix. ''Before application, input should be summed already.''
208 |        * @return value of `f(x)`
209 |        */
210 |       override def apply(x: ScalarMatrix): ScalarMatrix = act(x :- dX) :+ dY
211 | 
212 |       /**
213 |        * Serialize Activation function into String.
214 |        * @note If this is an "object", do not modify this function.
215 |        *       This does not supports Activation Operations defined outside of this package.
216 |        * @return JSON object states this function
217 |        */
218 |       override def toJSON: JsObject = Json.obj(
219 |         "function" → "translate",
220 |         "base" → act.toJSON,
221 |         "X" → dX,
222 |         "Y" → dY
223 |       )
224 |     }
225 | 
226 |     /**
227 |      * Add activation functions = `f1(x) + f2(x) ..`
228 |      * @param others Activation functions to be added. (Variable Parameter)
229 |      * @return new Activation Function
230 |      */
231 |     def +(others: Activation*) = new Activation {
232 |       private val applySeq = others :+ act
233 | 
234 |       /**
235 |        * Compute differentiation value of this function at `f(x) = fx`
236 |        *
237 |        * @param fx the __output__ of this function
238 |        * @return differentiation value at `f(x) = fx`, which should be an __square, diagonal matrix__
239 |        */
240 |       override def derivative(fx: ScalarMatrix): ScalarMatrix =
241 |         applySeq.map(_.derivative(fx)).reduce(_ :+ _)
242 | 
243 |       /**
244 |        * Compute mapping for `x`
245 |        *
246 |        * @param x the __input__ matrix. ''Before application, input should be summed already.''
247 |        * @return value of `f(x)`
248 |        */
249 |       override def apply(x: ScalarMatrix): ScalarMatrix =
250 |         applySeq.map(_.apply(x)).reduce(_ :+ _)
251 | 
252 |       /**
253 |        * Serialize Activation function into String.
254 |        * @note If this is an "object", do not modify this function.
255 |        *       This does not supports Activation Operations defined outside of this package.
256 |        * @return JSON object states this function
257 |        */
258 |       override def toJSON: JsObject = Json.obj(
259 |         "function" → "add",
260 |         "base" → act.toJSON,
261 |         "args" → others.map(_.toJSON)
262 |       )
263 |     }
264 |   }
265 | }
266 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/BasicLayer.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.layer
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import play.api.libs.json.{JsObject, Json}
  5 | 
  6 | /**
  7 |  * __Layer__: Basic, Fully-connected Layer
  8 |  *
  9 |  * @param IO a pair of __input & output__, such as 2 -> 3
 10 |  * @param act an __activation function__ to be applied
 11 |  * @param w initial weight matrix for the case that it is restored from JSON `(default: null)`
 12 |  * @param b inital bias matrix for the case that it is restored from JSON `(default: null)`
 13 |  */
 14 | class BasicLayer(IO: (Int, Int),
 15 |                  protected override val act: Activation,
 16 |                  w: ScalarMatrix = null,
 17 |                  b: ScalarMatrix = null)
 18 |   extends Layer {
 19 |   require(act != null, "Activation function must not be null.")
 20 |   require(IO._1 > 0, "Input dimension must be greater than 0")
 21 |   require(IO._2 > 0, "Output dimension must be greater than 0")
 22 | 
 23 |   /** Number of Fan-ins */
 24 |   protected final val fanIn = IO._1
 25 |   /** Number of output */
 26 |   protected final val fanOut = IO._2
 27 |   /* Initialize weight */
 28 |   protected final val weight = if (w != null) w else act.initialize(fanIn, fanOut)
 29 |   protected final val bias = if (b != null) b else act.initialize(fanIn, fanOut, fanOut, 1)
 30 |   /** weights for update */
 31 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq(weight, bias)
 32 | 
 33 |   /**
 34 |    * Forward computation
 35 |    *
 36 |    * @param x input matrix
 37 |    * @return output matrix
 38 |    */
 39 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
 40 |     val wx: ScalarMatrix = weight * x
 41 |     val wxb: ScalarMatrix = wx + bias
 42 |     act(wxb)
 43 |   }
 44 | 
 45 |   /**
 46 |    * Translate this layer into JSON object (in Play! framework)
 47 |    *
 48 |    * @return JSON object describes this layer
 49 |    */
 50 |   override def toJSON: JsObject = Json.obj(
 51 |     "type" → "BasicLayer",
 52 |     "in" → fanIn,
 53 |     "out" → fanOut,
 54 |     "act" → act.toJSON,
 55 |     "weight" → weight.to2DSeq,
 56 |     "bias" → bias.to2DSeq
 57 |   )
 58 | 
 59 |   /**
 60 |    * <p>Backward computation.</p>
 61 |    *
 62 |    * @note <p>
 63 |    *       Let this layer have function F composed with function <code> X(x) = W.x + b </code>
 64 |    *       and higher layer have function G.
 65 |    *       </p>
 66 |    *
 67 |    *       <p>
 68 |    *       Weight is updated with: <code>dG/dW</code>
 69 |    *       and propagate <code>dG/dx</code>
 70 |    *       </p>
 71 |    *
 72 |    *       <p>
 73 |    *       For the computation, we only used denominator layout. (cf. Wikipedia Page of Matrix Computation)
 74 |    *       For the computation rules, see "Matrix Cookbook" from MIT.
 75 |    *       </p>
 76 |    *
 77 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
 78 |    *              In this function, (bias :: weight) ::: lowerStack
 79 |    *              Thus dWeight is app
 80 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
 81 |    * @return propagated error (in this case, <code>dG/dx</code> )
 82 |    */
 83 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
 84 |     /*
 85 |      * Chain Rule : dG/dX_ij = tr[ ( dG/dF ).t * dF/dX_ij ].
 86 |      *
 87 |      * Note 1. X, dG/dF, dF/dX_ij are row vectors. Therefore tr(.) can be omitted.
 88 |      *
 89 |      * Thus, dG/dX = [ (dG/dF).t * dF/dX ].t, because [...] is 1 × fanOut matrix.
 90 |      * Therefore dG/dX = dF/dX * dG/dF, because dF/dX is symmetric in our case.
 91 |      */
 92 |     val dGdX: ScalarMatrix = dFdX * error
 93 | 
 94 |     // For bias, input is always 1. We only need dG/dX
 95 |     delta.next += dGdX
 96 | 
 97 |     /*
 98 |      * Chain Rule : dG/dW_ij = tr[ ( dG/dX ).t * dX/dW_ij ].
 99 |      *
100 |      * dX/dW_ij is a fan-Out dimension column vector with all zero but (i, 1) = X_j.
101 |      * Thus, tr(.) can be omitted, and dG/dW_ij = (dX/dW_ij).t * dG/dX
102 |      * Then {j-th column of dG/dW} = X_j * dG/dX = dG/dX * X_j.
103 |      *
104 |      * Therefore dG/dW = dG/dX * X.t
105 |      */
106 |     val dGdW: ScalarMatrix = dGdX * X.t
107 |     delta.next += dGdW
108 | 
109 |     /*
110 |      * Chain Rule : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
111 |      *
112 |      * X is column vector. Thus j is always 1, so dX/dx_i is a W_?i.
113 |      * Hence dG/dx_i = tr[ (dG/dX).t * dX/dx_ij ] = (W_?i).t * dG/dX.
114 |      *
115 |      * Thus dG/dx = W.t * dG/dX
116 |      */
117 |     val dGdx: ScalarMatrix = weight.t * dGdX
118 |     dGdx
119 |   }
120 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/Dropout.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import play.api.libs.json.{JsObject, Json}
 5 | 
 6 | /**
 7 |  * __Layer__ that drop-outs its input.
 8 |  *
 9 |  * This layer has a function of "pipeline" with drop-out possibility.
10 |  * Because dropping out neurons occurr in the hidden layer, we need some intermediate pipe that handle this feature.
11 |  * This layer only conveys its input to its output synapse if that output is alive.
12 |  */
13 | trait Dropout extends Layer {
14 |   /* On-off matrix */
15 |   protected var onoff: ScalarMatrix = null
16 |   /** The probability of the neuron is alive. `(Default: 1.0, 100%)` */
17 |   private var presence: Probability = 1.0f
18 | 
19 |   /**
20 |    * Set presence probability
21 |    * @param p Probability to be set
22 |    * @return Layer extended with dropout operta
23 |    */
24 |   def withProbability(p: Probability) = {
25 |     presence = p
26 |     this
27 |   }
28 | 
29 |   /**
30 |    * Forward computation
31 |    *
32 |    * @param x input matrix
33 |    * @return output matrix
34 |    */
35 |   abstract override def apply(x: ScalarMatrix): ScalarMatrix =
36 |     if (presence >= 1.0) super.apply(x)
37 |     else super.apply(x) :* presence.safe
38 | 
39 |   /**
40 |    * Translate this layer into JSON object (in Play! framework)
41 |    *
42 |    * @return JSON object describes this layer
43 |    */
44 |   abstract override def toJSON: JsObject = super.toJSON ++ Json.obj("Dropout" → presence)
45 | 
46 |   /**
47 |    * Sugar: Forward computation. Calls apply(x)
48 |    *
49 |    * @param x input matrix
50 |    * @return output matrix
51 |    */
52 |   abstract override def passedBy(x: ScalarMatrix): ScalarMatrix =
53 |     if (presence >= 1.0) super.passedBy(x)
54 |     else {
55 |       onoff = ScalarMatrix $01(x.rows, x.cols, presence.safe)
56 |       super.passedBy(x) :* onoff
57 |     }
58 | 
59 |   /**
60 |    * <p>Backward computation.</p>
61 |    *
62 |    * @note Because this layer only mediates two layers, this layer just remove propagated error for unused elements. 
63 |    *
64 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
65 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
66 |    * @return propagated error (in this case, <code>dG/dx</code> )
67 |    */
68 |   abstract override def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix =
69 |     if (presence >= 1) super.updateBy(delta, error)
70 |     else super.updateBy(delta, error :* onoff)
71 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/DropoutOperation.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import play.api.libs.json.{JsObject, Json}
 5 | 
 6 | /**
 7 |  * __Layer__ that drop-outs its input.
 8 |  *
 9 |  * This layer has a function of "pipeline" with drop-out possibility.
10 |  * Because dropping out neurons occurr in the hidden layer, we need some intermediate pipe that handle this feature.
11 |  * This layer only conveys its input to its output synapse if that output is alive.
12 |  *
13 |  * @note Please extend [[Dropout]] trait to target layer.
14 |  *
15 |  * @param presence The probability of the neuron is alive. `(Default: 1.0, 100%)`
16 |  */
17 | @deprecated
18 | class DropoutOperation(protected val presence: Probability = 1.0f) extends Layer {
19 |   /**
20 |    * weights for update
21 |    *
22 |    * @return weights
23 |    */
24 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq.empty
25 |   /** Null activation */
26 |   protected override val act = null
27 |   /* On-off matrix */
28 |   protected var onoff: ScalarMatrix = null
29 | 
30 |   /**
31 |    * Forward computation
32 |    *
33 |    * @param x input matrix
34 |    * @return output matrix
35 |    */
36 |   override def apply(x: ScalarMatrix): ScalarMatrix =
37 |     if (presence >= 1.0) x
38 |     else x :* presence.safe
39 | 
40 |   /**
41 |    * Translate this layer into JSON object (in Play! framework)
42 |    *
43 |    * @return JSON object describes this layer
44 |    */
45 |   override def toJSON: JsObject = Json.obj(
46 |     "type" → "DropoutOp",
47 |     "presence" → presence.safe
48 |   )
49 | 
50 |   /**
51 |    * Sugar: Forward computation. Calls apply(x)
52 |    *
53 |    * @param x input matrix
54 |    * @return output matrix
55 |    */
56 |   override def into_:(x: ScalarMatrix): ScalarMatrix =
57 |     if (presence >= 1.0) x
58 |     else {
59 |       onoff = ScalarMatrix $01(x.rows, x.cols, presence.safe)
60 |       x :* onoff
61 |     }
62 | 
63 |   /**
64 |    * <p>Backward computation.</p>
65 |    *
66 |    * @note Because this layer only mediates two layers, this layer just remove propagated error for unused elements. 
67 |    *
68 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
69 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
70 |    * @return propagated error (in this case, <code>dG/dx</code> )
71 |    */
72 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix =
73 |     if (presence >= 1) error
74 |     else error :* onoff
75 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/FullTensorLayer.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.{Activation, ScalarMatrix, ScalarMatrixOp}
 4 | import play.api.libs.json.{JsArray, JsObject, Json}
 5 | 
 6 | /**
 7 |  * __Layer__: Basic, Fully-connected Rank 3 Tensor Layer.
 8 |  *
 9 |  * @note <pre>
10 |  *       v0 = a column vector
11 |  *       Q = Rank 3 Tensor with size out, in × in is its entry.
12 |  *       L = Rank 3 Tensor with size out, 1 × in is its entry.
13 |  *       b = out × 1 matrix.
14 |  *
15 |  *       output = f( v0'.Q.v0 + L.v0 + b )
16 |  *       </pre>
17 |  *
18 |  * @param IO is a tuple of the number of input and output, i.e. (2 → 4)
19 |  * @param act is an activation function to be applied
20 |  * @param quad is initial quadratic-level weight matrix Q for the case that it is restored from JSON (default: Seq())
21 |  * @param lin is initial linear-level weight matrix L for the case that it is restored from JSON (default: null)
22 |  * @param const is initial bias weight matrix b for the case that it is restored from JSON (default: null)
23 |  */
24 | class FullTensorLayer(IO: (Int, Int),
25 |                       protected override val act: Activation,
26 |                       quad: Seq[ScalarMatrix] = Seq(),
27 |                       lin: ScalarMatrix = null,
28 |                       const: ScalarMatrix = null)
29 |   extends Rank3TensorLayer((IO._1, IO._1, IO._1), IO._2, act, quad, lin, const) {
30 | 
31 |   /**
32 |    * Translate this layer into JSON object (in Play! framework)
33 |    *
34 |    * @return JSON object describes this layer
35 |    */
36 |   override def toJSON: JsObject = Json.obj(
37 |     "type" → "FullTensorLayer",
38 |     "in" → fanIn,
39 |     "out" → fanOut,
40 |     "act" → act.toJSON,
41 |     "quadratic" → JsArray.apply(quadratic.map(_.to2DSeq)),
42 |     "linear" → linear.to2DSeq,
43 |     "bias" → bias.to2DSeq
44 |   )
45 | 
46 |   /**
47 |    * Retrieve first input
48 |    *
49 |    * @param x input to be separated
50 |    * @return first input
51 |    */
52 |   protected override def in1(x: ScalarMatrix): ScalarMatrix = x
53 | 
54 |   /**
55 |    * Retrive second input
56 |    *
57 |    * @param x input to be separated
58 |    * @return second input
59 |    */
60 |   protected override def in2(x: ScalarMatrix): ScalarMatrix = x
61 | 
62 |   /**
63 |    * Reconstruct error from fragments
64 |    * @param in1 error of input1
65 |    * @param in2 error of input2
66 |    * @return restored error
67 |    */
68 |   override protected def restoreError(in1: ScalarMatrix, in2: ScalarMatrix): ScalarMatrix = in1 + in2
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/GaussianRBFLayer.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.layer
  2 | 
  3 | import breeze.linalg.sum
  4 | import breeze.numerics.{exp, pow}
  5 | import kr.ac.kaist.ir.deep.fn._
  6 | import play.api.libs.json.{JsObject, Json}
  7 | 
  8 | import scala.annotation.tailrec
  9 | 
 10 | /**
 11 |  * __Layer__ : An Radial Basis Function Layer, with Gaussian function as its radial basis.
 12 |  *
 13 |  * @param in Dimension of input
 14 |  * @param centers A Matrix of Centroids. Each column is a column vector for centroids.
 15 |  * @param canModifyCenter True if update center during training.
 16 |  * @param w Initial weight (default: null)
 17 |  */
 18 | class GaussianRBFLayer(val in: Int,
 19 |                        val centers: ScalarMatrix,
 20 |                        val canModifyCenter: Boolean = true,
 21 |                        w: ScalarMatrix = null) extends Layer {
 22 |   protected final val weight = if (w != null) w else ScalarMatrix of(centers.cols, 1)
 23 |   protected final val sumCentroidEff = ScalarMatrix $1(centers.cols, 1)
 24 |   protected final val sumByRow = ScalarMatrix $1(1, in)
 25 |   override protected val act: Activation = null
 26 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq(centers, weight)
 27 | 
 28 |   /**
 29 |    * Translate this layer into JSON object (in Play! framework)
 30 |    * @note Please make an LayerReviver object if you're using custom layer.
 31 |    *       In that case, please specify LayerReviver object's full class name as "__reviver__,"
 32 |    *       and fill up LayerReviver.revive method.
 33 |    *
 34 |    * @return JSON object describes this layer
 35 |    */
 36 |   override def toJSON: JsObject = Json.obj(
 37 |     "type" → "GaussianRBF",
 38 |     "in" → in,
 39 |     "center" → centers.to2DSeq,
 40 |     "canModifyCenter" → canModifyCenter,
 41 |     "weight" → weight.to2DSeq
 42 |   )
 43 | 
 44 |   /**
 45 |    * Forward computation
 46 |    *
 47 |    * @param x input matrix
 48 |    * @return output matrix
 49 |    */
 50 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
 51 |     val sqWeight: ScalarMatrix = pow(weight, 2f) :* 2f
 52 |     exp(applyCoord(x, sqWeight, ScalarMatrix $0(centers.cols, 1), centers.cols - 1))
 53 |   }
 54 | 
 55 |   /**
 56 |    * <p>Backward computation.</p>
 57 |    *
 58 |    * @note <p>
 59 |    *       Let X ~ N(c_i, s_i) be the Gaussian distribution, and let N_i be the pdf of it.
 60 |    *       Then the output of this layer will be : <code>y_i = N_i(x) = exp(-[x-c_i]*[x-c_i]/[2*s_i*s_i])</code>.
 61 |    *       Call function on the higher layers as G.
 62 |    *       </p>
 63 |    *
 64 |    *       <p>
 65 |    *       Centers are updated with: <code>dG/dC_ij = dG/dN_i * dN_i/dc_ij.</code>
 66 |    *       Weights are updated with: <code>dG/dW_i = dG/dN_i * dN_i/dw_i.</code>
 67 |    *       and propagate <code>dG/dx_j = \sum_i dG/dN_i * dN_i/dx_ij.</code>
 68 |    *       </p>
 69 |    *
 70 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
 71 |    *              In this case, (weight :: centers) :: lowerStack
 72 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
 73 |    * @return propagated error (in this case, <code>dG/dx</code> )
 74 |    */
 75 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
 76 |     val multiplier: ScalarMatrix = (error :* dFdX) :/ pow(weight, 2f)
 77 | 
 78 |     val dGdC = ScalarMatrix.$0(centers.rows, centers.cols)
 79 |     val dWeight = updateCoord(multiplier, dGdC, centers.cols - 1)
 80 | 
 81 |     // Update Weight
 82 |     delta.next += ScalarMatrix(dWeight: _*)
 83 | 
 84 |     if (canModifyCenter)
 85 |       delta.next += dGdC
 86 |     else
 87 |       delta.next()
 88 | 
 89 |     -dGdC * sumCentroidEff
 90 |   }
 91 | 
 92 |   @tailrec
 93 |   private def applyCoord(x: ScalarMatrix, sqWeight: ScalarMatrix, out: ScalarMatrix, i: Int): ScalarMatrix =
 94 |     if (i >= 0) {
 95 |       val d: Scalar = sum(pow(x - centers(::, i to i), 2f))
 96 |       val in = -d / sqWeight(i, 0)
 97 | 
 98 |       out(i, 0) = in
 99 |       applyCoord(x, sqWeight, out, i - 1)
100 |     } else
101 |       out
102 | 
103 |   @tailrec
104 |   private def updateCoord(multiplier: ScalarMatrix, dGdC: ScalarMatrix,
105 |                           i: Int, dWeight: Seq[Scalar] = Seq.empty): Seq[Scalar] =
106 |     if (i >= 0) {
107 |       val d: ScalarMatrix = X - centers(::, i to i)
108 | 
109 |       val w = weight(i, 0)
110 |       val m = multiplier(i, 0)
111 | 
112 |       /* Compute dNi/dCij.
113 |        * Since Ni = exp(-|x-ci|^2/(2si^2)), dNi/dCij = (xj-cij)/si^2 * Ni.
114 |        * Therefore dNi/dCi = (x-ci)/si^2 * Ni.
115 |        * dG/dCi = dG/dNi * dNi/dCi.
116 |        * Note that dNi/dX = -dNi/dCi, and dG/dX = - \sum (dG/dNi * dNi/dCi)
117 |        */
118 |       dGdC(::, i to i) := d * m
119 | 
120 |       /* Compute dG/dSi.
121 |        * dNi/dSi = |x-ci|^2/si^3 * Ni.
122 |        * dG/dSi = dG/dNi * dNi/dSi.
123 |        */
124 |       val wUpdate = sum(pow(d, 2f)) * (m / w)
125 |       // This update entry is the topmost row entry.
126 |       updateCoord(multiplier, dGdC, i - 1, wUpdate +: dWeight)
127 |     } else
128 |       dWeight
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/LowerTriangularLayer.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.layer
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import play.api.libs.json.{JsObject, Json}
  5 | 
  6 | /**
  7 |  * __Layer__: Basic, Fully-connected Layer
  8 |  *
  9 |  * @param IO a pair of __input & output__, such as 2 -> 3
 10 |  * @param act an __activation function__ to be applied
 11 |  * @param w initial weight matrix for the case that it is restored from JSON `(default: null)`
 12 |  * @param b inital bias matrix for the case that it is restored from JSON `(default: null)`
 13 |  */
 14 | class LowerTriangularLayer(IO: (Int, Int),
 15 |                            protected override val act: Activation,
 16 |                            w: ScalarMatrix = null,
 17 |                            b: ScalarMatrix = null)
 18 |   extends Layer {
 19 |   /** Number of Fan-ins */
 20 |   protected final val fanIn = IO._1
 21 |   /** Number of output */
 22 |   protected final val fanOut = IO._2
 23 |   /* Initialize weight */
 24 |   protected final val weight =
 25 |     if (w != null) w
 26 |     else
 27 |       act.initialize(fanIn, fanOut).mapActivePairs {
 28 |         case ((r, c), x) ⇒ if (c > r) 0f else x
 29 |       }
 30 |   protected final val bias = if (b != null) b else act.initialize(fanIn, fanOut, fanOut, 1)
 31 |   /** weights for update */
 32 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq(weight, bias)
 33 | 
 34 |   /**
 35 |    * Forward computation
 36 |    *
 37 |    * @param x input matrix
 38 |    * @return output matrix
 39 |    */
 40 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
 41 |     val wx: ScalarMatrix = weight * x
 42 |     val wxb: ScalarMatrix = wx + bias
 43 |     act(wxb)
 44 |   }
 45 | 
 46 |   /**
 47 |    * Translate this layer into JSON object (in Play! framework)
 48 |    *
 49 |    * @return JSON object describes this layer
 50 |    */
 51 |   override def toJSON: JsObject = Json.obj(
 52 |     "type" → "LowerTriangularLayer",
 53 |     "in" → fanIn,
 54 |     "out" → fanOut,
 55 |     "act" → act.toJSON,
 56 |     "weight" → weight.to2DSeq,
 57 |     "bias" → bias.to2DSeq
 58 |   )
 59 | 
 60 |   /**
 61 |    * <p>Backward computation.</p>
 62 |    *
 63 |    * @note <p>
 64 |    *       Let this layer have function F composed with function <code> X(x) = W.x + b </code>
 65 |    *       and higher layer have function G.
 66 |    *       </p>
 67 |    *
 68 |    *       <p>
 69 |    *       Weight is updated with: <code>dG/dW</code>
 70 |    *       and propagate <code>dG/dx</code>
 71 |    *       </p>
 72 |    *
 73 |    *       <p>
 74 |    *       For the computation, we only used denominator layout. (cf. Wikipedia Page of Matrix Computation)
 75 |    *       For the computation rules, see "Matrix Cookbook" from MIT.
 76 |    *       </p>
 77 |    *
 78 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
 79 |    *              In this case, bias :: weight ::: lowerStack
 80 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
 81 |    * @return propagated error (in this case, <code>dG/dx</code> )
 82 |    */
 83 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
 84 |     /*
 85 |      * Chain Rule : dG/dX_ij = tr[ ( dG/dF ).t * dF/dX_ij ].
 86 |      *
 87 |      * Note 1. X, dG/dF, dF/dX_ij are row vectors. Therefore tr(.) can be omitted.
 88 |      *
 89 |      * Thus, dG/dX = [ (dG/dF).t * dF/dX ].t, because [...] is 1 × fanOut matrix.
 90 |      * Therefore dG/dX = dF/dX * dG/dF, because dF/dX is symmetric in our case.
 91 |      */
 92 |     val dGdX: ScalarMatrix = dFdX * error
 93 | 
 94 |     // For bias, input is always 1. We only need dG/dX
 95 |     delta.next += dGdX
 96 | 
 97 |     /*
 98 |      * Chain Rule : dG/dW_ij = tr[ ( dG/dX ).t * dX/dW_ij ].
 99 |      *
100 |      * dX/dW_ij is a fan-Out dimension column vector with all zero but (i, 1) = X_j.
101 |      * Thus, tr(.) can be omitted, and dG/dW_ij = (dX/dW_ij).t * dG/dX
102 |      * Then {j-th column of dG/dW} = X_j * dG/dX = dG/dX * X_j.
103 |      *
104 |      * Therefore dG/dW = dG/dX * X.t
105 |      * Except the upper triangular region.
106 |      */
107 |     val dGdWp: ScalarMatrix = (dGdX * X.t)
108 |     val dGdW = dGdWp.mapActivePairs {
109 |       case ((r, c), x) ⇒ if (c > r) 0f else x
110 |     }
111 |     delta.next += dGdW
112 | 
113 |     /*
114 |      * Chain Rule : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
115 |      *
116 |      * X is column vector. Thus j is always 1, so dX/dx_i is a W_?i.
117 |      * Hence dG/dx_i = tr[ (dG/dX).t * dX/dx_ij ] = (W_?i).t * dG/dX.
118 |      *
119 |      * Thus dG/dx = W.t * dG/dX
120 |      */
121 |     val dGdx: ScalarMatrix = weight.t * dGdX
122 |     dGdx
123 |   }
124 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/Normalize.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import breeze.linalg.sum
 4 | import breeze.numerics.pow
 5 | import kr.ac.kaist.ir.deep.fn._
 6 | import play.api.libs.json.{JsObject, Json}
 7 | 
 8 | /**
 9 |  * __Layer__ that normalizes its input.
10 |  */
11 | trait Normalize extends Layer {
12 |   /**
13 |    * weights for update
14 |    *
15 |    * @return weights
16 |    */
17 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq.empty
18 |   /** Null activation */
19 |   protected override val act = null
20 | 
21 |   /**
22 |    * Forward computation
23 |    *
24 |    * @param x input matrix
25 |    * @return output matrix
26 |    */
27 |   abstract override def apply(x: ScalarMatrix): ScalarMatrix = {
28 |     val raw = super.apply(x)
29 |     val len = Math.sqrt(sum(pow(raw, 2.0f))).toFloat
30 |     raw :/ len
31 |   }
32 | 
33 |   /**
34 |    * Translate this layer into JSON object (in Play! framework)
35 |    *
36 |    * @return JSON object describes this layer
37 |    */
38 |   abstract override def toJSON: JsObject = super.toJSON ++ Json.obj("Normalize" → "")
39 | 
40 |   /**
41 |    * <p>Backward computation.</p>
42 |    *
43 |    * @note Because this layer only mediates two layers, this layer just remove propagated error for unused elements. 
44 |    *
45 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
46 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
47 |    * @return propagated error (in this case, <code>dG/dx</code> )
48 |    */
49 |   abstract override def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
50 |     val Xsq = pow(X, 2.0f)
51 |     val lenSq = sum(Xsq)
52 |     val len: Scalar = Math.sqrt(lenSq).toFloat
53 | 
54 |     // Note that length is the function of x_i.
55 |     // Let z_i := x_i / len(x_i).
56 |     // Then d z_i / d x_i = (len^2 - x_i^2) / len^3 = (1 - z_i^2) / len,
57 |     //      d z_j / d x_i = - x_i * x_j / len^3 = - z_i * z_j / len
58 |     val rows = dFdX.rows
59 |     val dZdX = ScalarMatrix $0(rows, rows)
60 |     var r = 0
61 |     while (r < rows) {
62 |       //dZ_r
63 |       var c = 0
64 |       while (c < rows) {
65 |         if (r == c) {
66 |           //dX_c
67 |           dZdX.update(r, c, (1.0f - Xsq(r, 0) / lenSq) / len)
68 |         } else {
69 |           dZdX.update(r, c, (-X(r, 0) * X(c, 0)) / (len * lenSq))
70 |         }
71 |         c += 1
72 |       }
73 |       r += 1
74 |     }
75 | 
76 |     // un-normalize the error
77 |     super.updateBy(delta, dZdX * error)
78 |   }
79 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/NormalizeOperation.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import breeze.linalg.sum
 4 | import breeze.numerics.pow
 5 | import kr.ac.kaist.ir.deep.fn._
 6 | import play.api.libs.json.{JsObject, Json}
 7 | 
 8 | /**
 9 |  * __Layer__ that normalizes its input.
10 |  *
11 |  * @param factor The multiplication factor of the normalized output `(Default 1.0)`
12 |  */
13 | @deprecated
14 | class NormalizeOperation(protected val factor: Scalar = 1.0f) extends Layer {
15 |   /**
16 |    * weights for update
17 |    *
18 |    * @return weights
19 |    */
20 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq.empty
21 |   /** Null activation */
22 |   protected override val act = null
23 | 
24 |   /**
25 |    * Translate this layer into JSON object (in Play! framework)
26 |    *
27 |    * @return JSON object describes this layer
28 |    */
29 |   override def toJSON: JsObject = Json.obj(
30 |     "type" → "NormOp",
31 |     "factor" → factor
32 |   )
33 | 
34 |   /**
35 |    * <p>Backward computation.</p>
36 |    *
37 |    * @note Because this layer only mediates two layers, this layer just remove propagated error for unused elements.
38 |    *
39 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
40 |    *              In this case, centers :: weight :: REMAINDER-SEQ
41 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
42 |    * @return propagated error (in this case, <code>dG/dx</code> ) and remainder of delta sequence
43 |    */
44 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
45 |     val len: Scalar = Math.sqrt(sum(pow(X, 2.0f))).toFloat
46 |     val output: ScalarMatrix = apply(X)
47 | 
48 |     // Note that length is the function of x_i.
49 |     // Let z_i := x_i / len(x_i).
50 |     // Then d z_i / d x_i = (len^2 - x_i^2) / len^3 = (1 - z_i^2) / len,
51 |     //      d z_j / d x_i = - x_i * x_j / len^3 = - z_i * z_j / len
52 |     val rows = dFdX.rows
53 |     val dZdX = ScalarMatrix $0(rows, rows)
54 |     var r = 0
55 |     while (r < rows) {
56 |       //dZ_r
57 |       var c = 0
58 |       while (c < rows) {
59 |         if (r == c) {
60 |           //dX_c
61 |           dZdX.update(r, c, (1.0f - output(r, 0) * output(r, 0)) / len)
62 |         } else {
63 |           dZdX.update(r, c, (-output(r, 0) * output(c, 0)) / len)
64 |         }
65 |         c += 1
66 |       }
67 |       r += 1
68 |     }
69 | 
70 |     // un-normalize the error
71 |     dZdX * error
72 |   }
73 | 
74 |   /**
75 |    * Forward computation
76 |    *
77 |    * @param x input matrix
78 |    * @return output matrix
79 |    */
80 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
81 |     val len = Math.sqrt(sum(pow(x, 2.0f))).toFloat
82 |     val normalized: ScalarMatrix = x :/ len
83 | 
84 |     if (factor != 1.0f)
85 |       normalized :* factor
86 |     else
87 |       normalized
88 |   }
89 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/Rank3TensorLayer.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.layer
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | 
  5 | /**
  6 |  * __Layer__: Basic, Fully-connected Rank 3 Tensor Layer.
  7 |  *
  8 |  * @note <pre>
  9 |  *       v0 = a column vector concatenate v2 after v1 (v11, v12, ... v1in1, v21, ...)
 10 |  *       Q = Rank 3 Tensor with size out, in1 × in2 is its entry.
 11 |  *       L = Rank 3 Tensor with size out, 1 × (in1 + in2) is its entry.
 12 |  *       b = out × 1 matrix.
 13 |  *
 14 |  *       output = f( v1'.Q.v2 + L.v0 + b )
 15 |  *       </pre>
 16 |  *
 17 |  * @param fanIns is the number of input. (vector1, vector2, entire).
 18 |  * @param fanOut is the number of output
 19 |  * @param act is an activation function to be applied
 20 |  * @param quad is initial quadratic-level weight matrix Q for the case that it is restored from JSON (default: Seq())
 21 |  * @param lin is initial linear-level weight matrix L for the case that it is restored from JSON (default: null)
 22 |  * @param const is initial bias weight matrix b for the case that it is restored from JSON (default: null)
 23 |  */
 24 | abstract class Rank3TensorLayer(protected val fanIns: (Int, Int, Int),
 25 |                                 protected val fanOut: Int,
 26 |                                 protected override val act: Activation,
 27 |                                 quad: Seq[ScalarMatrix] = Seq(),
 28 |                                 lin: ScalarMatrix = null,
 29 |                                 const: ScalarMatrix = null)
 30 |   extends Layer {
 31 |   /* Number of Fan-ins */
 32 |   protected final val fanInA = fanIns._1
 33 |   protected final val fanInB = fanIns._2
 34 |   protected final val fanIn = fanIns._3
 35 |   /* Initialize weight */
 36 |   protected final val quadratic: IndexedSeq[ScalarMatrix] =
 37 |     if (quad.nonEmpty) quad.toIndexedSeq
 38 |     else (0 until fanOut).map(_ ⇒ act.initialize(fanIn, fanOut, fanInA, fanInB))
 39 |   protected final val linear: ScalarMatrix = if (lin != null) lin else act.initialize(fanIn, fanOut, fanOut, fanIn)
 40 |   protected final val bias: ScalarMatrix = if (const != null) const else act.initialize(fanIn, fanOut, fanOut, 1)
 41 | 
 42 |   /**
 43 |    * Retrieve first input
 44 |    *
 45 |    * @param x input to be separated
 46 |    * @return first input
 47 |    */
 48 |   protected def in1(x: ScalarMatrix): ScalarMatrix
 49 | 
 50 |   /**
 51 |    * Retrive second input
 52 |    *
 53 |    * @param x input to be separated
 54 |    * @return second input
 55 |    */
 56 |   protected def in2(x: ScalarMatrix): ScalarMatrix
 57 | 
 58 |   /**
 59 |    * Reconstruct error from fragments
 60 |    * @param in1 error of input1
 61 |    * @param in2 error of input2
 62 |    * @return restored error
 63 |    */
 64 |   protected def restoreError(in1: ScalarMatrix, in2: ScalarMatrix): ScalarMatrix
 65 | 
 66 |   /**
 67 |    * Forward computation
 68 |    *
 69 |    * @param x input matrix
 70 |    * @return output matrix
 71 |    */
 72 |   override def apply(x: ScalarMatrix): ScalarMatrix = {
 73 |     val inA = in1(x)
 74 |     val inB = in2(x)
 75 | 
 76 |     val intermediate: ScalarMatrix = linear * x
 77 |     intermediate += bias
 78 | 
 79 |     val quads = quadratic.map { q ⇒
 80 |       val xQ: ScalarMatrix = inA.t * q
 81 |       val xQy: ScalarMatrix = xQ * inB
 82 |       xQy(0, 0)
 83 |     }
 84 |     intermediate += ScalarMatrix(quads: _*)
 85 | 
 86 |     act(intermediate)
 87 |   }
 88 | 
 89 |   /**
 90 |    * weights for update
 91 |    *
 92 |    * @return weights
 93 |    */
 94 |   override val W: IndexedSeq[ScalarMatrix] = (quadratic :+ linear) :+ bias
 95 | 
 96 |   /**
 97 |    * <p>Backward computation.</p>
 98 |    *
 99 |    * @note <p>
100 |    *       Let this layer have function F composed with function <code> X(x) = x1'.Q.x2 + L.x + b </code>
101 |    *       and higher layer have function G. (Each output is treated as separately except propagation)
102 |    *       </p>
103 |    *
104 |    *       <p>
105 |    *       Weight is updated with: <code>dG/dW</code>
106 |    *       and propagate <code>dG/dx</code>
107 |    *       </p>
108 |    *
109 |    *       <p>
110 |    *       For the computation, we only used denominator layout. (cf. Wikipedia Page of Matrix Computation)
111 |    *       For the computation rules, see "Matrix Cookbook" from MIT.
112 |    *       </p>
113 |    *
114 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
115 |    *              In this case, bias :: linear :: quadratic(K to 0) ::: lowerStack
116 |    * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
117 |    * @return propagated error (in this case, <code>dG/dx</code> )
118 |    */
119 |   def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
120 |     val inA = in1(X)
121 |     val inB = in2(X)
122 | 
123 |     /*
124 |      * Chain Rule : dG/dX_ij = tr[ ( dG/dF ).t * dF/dX_ij ].
125 |      *
126 |      * Note 1. X, dG/dF, dF/dX_ij are row vectors. Therefore tr(.) can be omitted.
127 |      *
128 |      * Thus, dG/dX = [ (dG/dF).t * dF/dX ].t, because [...] is 1 × fanOut matrix.
129 |      * Therefore dG/dX = dF/dX * dG/dF, because dF/dX is symmetric in our case.
130 |      */
131 |     val dGdX: ScalarMatrix = dFdX * error
132 | 
133 |     // For bias, input is always 1. We only need dG/dX
134 |     delta.next += dGdX
135 | 
136 |     /*
137 |      * Chain Rule (Linear weight case) : dG/dW_ij = tr[ ( dG/dX ).t * dX/dW_ij ].
138 |      *
139 |      * dX/dW_ij is a fan-Out dimension column vector with all zero but (i, 1) = X_j.
140 |      * Thus, tr(.) can be omitted, and dG/dW_ij = (dX/dW_ij).t * dG/dX
141 |      * Then {j-th column of dG/dW} = X_j * dG/dX = dG/dX * X_j.
142 |      *
143 |      * Therefore dG/dW = dG/dX * X.t
144 |      */
145 |     val dGdL = dGdX * X.t
146 |     delta.next += dGdL
147 |     /*
148 |      * Chain Rule (Linear weight part) : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
149 |      *
150 |      * X is column vector. Thus j is always 1, so dX/dx_i is a W_?i.
151 |      * Hence dG/dx_i = tr[ (dG/dX).t * dX/dx_ij ] = (W_?i).t * dG/dX.
152 |      *
153 |      * Thus dG/dx (linear part) = W.t * dG/dX.
154 |      */
155 |     val dGdx = linear.t * dGdX
156 | 
157 |     /*
158 |      * Because X = inA.t * Q * inB, dX/dQ = inA * inB.t
159 |      */
160 |     val dXdQ: ScalarMatrix = inA * inB.t //d tr(axb)/dx = a'b'
161 | 
162 |     // Add dG/dx quadratic part.
163 |     updateQuadratic(inA, inB, dGdX, dXdQ, dGdx, delta)
164 |   }
165 | 
166 |   private def updateQuadratic(inA: ScalarMatrix, inB: ScalarMatrix,
167 |                               dGdXAll: ScalarMatrix, dXdQ: ScalarMatrix,
168 |                               acc: ScalarMatrix, delta: Iterator[ScalarMatrix], id: Int = fanOut - 1): ScalarMatrix =
169 |     if (id >= 0) {
170 |       // This is scalar
171 |       val dGdX = dGdXAll(id, 0)
172 | 
173 |       /*
174 |        * Chain Rule (Quadratic weight case) : dG/dQ_ij = tr[ ( dG/dX ).t * dX/dQ_ij ].
175 |        *
176 |        * dX/dQ_ij = (inA * inB.t)_ij, and so dG/dQ_ij = (dG/dX).t * dX/dQ_ij.
177 |        * They are scalar, so dG/dQ = dG/dX * dX/dQ.
178 |        */
179 |       val dGdQ: ScalarMatrix = dXdQ :* dGdX
180 |       delta.next += dGdQ
181 | 
182 |       /*
183 |        * Chain Rule (Linear weight part) : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
184 |        *
185 |        * X is column vector. Thus j is always 1, so dX/dx_i is a W_?i.
186 |        * Hence dG/dx_i = tr[ (dG/dX).t * dX/dx_ij ] = (W_?i).t * dG/dX.
187 |        *
188 |        * Thus dG/dx = W.t * dG/dX.
189 |        *
190 |        * Chain Rule (Quadratic weight part) : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
191 |        *
192 |        * Note that x is a column vector with inA, inB as parts.
193 |        * Because X = inA.t * Q * inB, dX/dxA = inB.t * Q.t and dX/dxB = inA.t * Q
194 |        * Since dG/dX is scalar, we obtain dG/dx by scalar multiplication.
195 |        */
196 |       val dXdxQ1: ScalarMatrix = inB.t * quadratic(id).t //d tr(ax')/dx = d tr(x'a)/dx = a'
197 |       val dXdxQ2: ScalarMatrix = inA.t * quadratic(id) //d tr(ax)/dx = d tr(xa)/dx = a
198 |       val dGdx: ScalarMatrix = restoreError(dXdxQ1, dXdxQ2) :* dGdX
199 |       acc += dGdx
200 | 
201 |       updateQuadratic(inA, inB, dGdXAll, dXdQ, acc, delta, id - 1)
202 |     } else
203 |       acc
204 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/ReconBasicLayer.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import play.api.libs.json.JsObject
 5 | 
 6 | /**
 7 |  * __Layer__ : Reconstructable Basic Layer
 8 |  *
 9 |  * @param IO is a pair of input & output, such as 2 -> 3
10 |  * @param act is an activation function to be applied
11 |  * @param w is initial weight matrix for the case that it is restored from JSON (default: null)
12 |  * @param b is inital bias matrix for the case that it is restored from JSON (default: null)
13 |  * @param rb is initial reconstruct bias matrix for the case that it is restored from JSON (default: null)
14 |  */
15 | class ReconBasicLayer(IO: (Int, Int),
16 |                       act: Activation,
17 |                       w: ScalarMatrix = null,
18 |                       b: ScalarMatrix = null,
19 |                       rb: ScalarMatrix = null)
20 |   extends BasicLayer(IO, act, w, b) with Reconstructable {
21 |   protected final val reBias = if (rb != null) rb else act initialize(fanIn, fanOut, fanIn, 1)
22 |   /**
23 |    * weights for update
24 |    *
25 |    * @return weights
26 |    */
27 |   override val W: IndexedSeq[ScalarMatrix] = IndexedSeq(bias, weight, weight, reBias)
28 | 
29 |   /**
30 |    * Sugar: reconstruction
31 |    *
32 |    * @param x hidden layer output matrix
33 |    * @return tuple of reconstruction output
34 |    */
35 |   override def decodeFrom(x: ScalarMatrix): ScalarMatrix = {
36 |     val wx: ScalarMatrix = weight.t[ScalarMatrix, ScalarMatrix] * x
37 |     val wxb: ScalarMatrix = wx + reBias
38 |     act(wxb)
39 |   }
40 | 
41 |   /**
42 |    * Translate this layer into JSON object (in Play! framework)
43 |    *
44 |    * @return JSON object describes this layer
45 |    */
46 |   override def toJSON: JsObject = super.toJSON + ("reconst_bias" → reBias.to2DSeq)
47 | 
48 |   /**
49 |    * Backpropagation of reconstruction. For the information about backpropagation calculation, see [[kr.ac.kaist.ir.deep.layer.Layer]]
50 |    *
51 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
52 |    *              In this case, reBias :: weight ::: lowerStack
53 |    * @param error error matrix to be propagated
54 |    * @return propagated error
55 |    */
56 |   protected[deep] def decodeUpdateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
57 |     /*
58 |      * Chain Rule : dG/dX_ij = tr[ ( dG/dF ).t * dF/dX_ij ].
59 |      *
60 |      * Note 1. X, dG/dF, dF/dX_ij are row vectors. Therefore tr(.) can be omitted.
61 |      *
62 |      * Thus, dG/dX = [ (dG/dF).t * dF/dX ].t, because [...] is 1 × fanOut matrix.
63 |      * Therefore dG/dX = dF/dX * dG/dF, because dF/dX is symmetric in our case.
64 |      */
65 |     val dGdX: ScalarMatrix = decdFdX * error
66 | 
67 |     // For bias, input is always 1. We only need dG/dX
68 |     delta.next += dGdX
69 | 
70 |     /*
71 |      * Chain Rule : dG/dW_ij = tr[ ( dG/dX ).t * dX/dW_ij ].
72 |      *
73 |      * dX/dW_ij is a fan-Out dimension column vector with all zero but (i, 1) = X_j.
74 |      * Thus, tr(.) can be omitted, and dG/dW_ij = (dX/dW_ij).t * dG/dX
75 |      * Then {j-th column of dG/dW} = X_j * dG/dX = dG/dX * X_j.
76 |      *
77 |      * Therefore dG/dW = dG/dX * X.t
78 |      */
79 |     val dGdW: ScalarMatrix = dGdX * decX.t
80 |     delta.next += dGdW.t // Because we used transposed weight for reconstruction, we need to transpose it.
81 | 
82 |     /*
83 |      * Chain Rule : dG/dx_ij = tr[ ( dG/dX ).t * dX/dx_ij ].
84 |      *
85 |      * X is column vector. Thus j is always 1, so dX/dx_i is a W_?i.
86 |      * Hence dG/dx_i = tr[ (dG/dX).t * dX/dx_ij ] = (W_?i).t * dG/dX.
87 |      *
88 |      * Thus dG/dx = W.t * dG/dX
89 |      */
90 |     val dGdx: ScalarMatrix = weight * dGdX // Because we used transposed weight for reconstruction.
91 |     dGdx
92 |   }
93 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/Reconstructable.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | 
 5 | /**
 6 |  * __Trait__ of Layer that can be used for autoencoder
 7 |  */
 8 | trait Reconstructable extends Layer {
 9 |   protected var decX: ScalarMatrix = _
10 |   protected var decdFdX: ScalarMatrix = _
11 |   /**
12 |    * Reconstruction
13 |    *
14 |    * @param x hidden layer output matrix
15 |    * @return tuple of reconstruction output
16 |    */
17 |   def decodeFrom(x: ScalarMatrix): ScalarMatrix
18 | 
19 |   /**
20 |    * Sugar: reconstruction
21 |    *
22 |    * @param x hidden layer output matrix
23 |    * @return tuple of reconstruction output
24 |    */
25 |   def decodeBy(x: ScalarMatrix): ScalarMatrix = {
26 |     decX = x
27 |     val out = decodeFrom(x)
28 |     decdFdX = act.derivative(out)
29 |     out
30 |   }
31 | 
32 |   /**
33 |    * Sugar: reconstruction
34 |    *
35 |    * @param x hidden layer output matrix
36 |    * @return tuple of reconstruction output
37 |    */
38 |   @deprecated
39 |   def decodeBy_:(x: ScalarMatrix): ScalarMatrix = decodeBy(x)
40 | 
41 |   /**
42 |    * Backpropagation of reconstruction. For the information about backpropagation calculation, see [[kr.ac.kaist.ir.deep.layer.Layer]]
43 |    *
44 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
45 |    * @param error error matrix to be propagated
46 |    * @return propagated error
47 |    */
48 |   protected[deep] def decodeUpdateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/SplitTensorLayer.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.layer
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import play.api.libs.json.{JsArray, JsObject, Json}
 5 | 
 6 | /**
 7 |  * __Layer__: Basic, Fully-connected Rank 3 Tensor Layer.
 8 |  *
 9 |  * @note <pre>
10 |  *       v0 = a column vector concatenate v2 after v1 (v11, v12, ... v1in1, v21, ...)
11 |  *       Q = Rank 3 Tensor with size out, in1 × in2 is its entry.
12 |  *       L = Rank 3 Tensor with size out, 1 × (in1 + in2) is its entry.
13 |  *       b = out × 1 matrix.
14 |  *
15 |  *       output = f( v1'.Q.v2 + L.v0 + b )
16 |  *       </pre>
17 |  *
18 |  * @param IO is a tuple of the number of input and output, i.e. ((2, 3) → 4)
19 |  * @param act is an activation function to be applied
20 |  * @param quad is initial quadratic-level weight matrix Q for the case that it is restored from JSON (default: Seq())
21 |  * @param lin is initial linear-level weight matrix L for the case that it is restored from JSON (default: null)
22 |  * @param const is initial bias weight matrix b for the case that it is restored from JSON (default: null)
23 |  */
24 | class SplitTensorLayer(IO: ((Int, Int), Int),
25 |                        protected override val act: Activation,
26 |                        quad: Seq[ScalarMatrix] = Seq(),
27 |                        lin: ScalarMatrix = null,
28 |                        const: ScalarMatrix = null)
29 |   extends Rank3TensorLayer((IO._1._1, IO._1._2, IO._1._1 + IO._1._2), IO._2, act, quad, lin, const) {
30 | 
31 |   /**
32 |    * Translate this layer into JSON object (in Play! framework)
33 |    *
34 |    * @return JSON object describes this layer
35 |    */
36 |   override def toJSON: JsObject = Json.obj(
37 |     "type" → "SplitTensorLayer",
38 |     "in" → Json.arr(fanInA, fanInB),
39 |     "out" → fanOut,
40 |     "act" → act.toJSON,
41 |     "quadratic" → JsArray.apply(quadratic.map(_.to2DSeq)),
42 |     "linear" → linear.to2DSeq,
43 |     "bias" → bias.to2DSeq
44 |   )
45 | 
46 |   /**
47 |    * Retrieve first input
48 |    *
49 |    * @param x input to be separated
50 |    * @return first input
51 |    */
52 |   protected override def in1(x: ScalarMatrix): ScalarMatrix = x(0 until fanInA, ::)
53 | 
54 |   /**
55 |    * Retrive second input
56 |    * @param x input to be separated
57 |    * @return second input
58 |    */
59 |   protected override def in2(x: ScalarMatrix): ScalarMatrix = x(fanInA to -1, ::)
60 | 
61 |   /**
62 |    * Reconstruct error from fragments
63 |    * @param in1 error of input1
64 |    * @param in2 error of input2
65 |    * @return restored error
66 |    */
67 |   override protected def restoreError(in1: ScalarMatrix, in2: ScalarMatrix): ScalarMatrix = in1 col_+ in2
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/layer/package.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import play.api.libs.json.{JsObject, JsValue}
  5 | 
  6 | import scala.reflect.runtime._
  7 | 
  8 | /**
  9 |  * Package for layer implementation
 10 |  */
 11 | package object layer {
 12 | 
 13 |   /**
 14 |    * __Trait__ that describes layer-level computation
 15 |    *
 16 |    * Layer is an instance of ScalaMatrix => ScalaMatrix function.
 17 |    * Therefore "layers" can be composed together.
 18 |    */
 19 |   trait Layer extends (ScalarMatrix ⇒ ScalarMatrix) with Serializable {
 20 |     /** Activation Function */
 21 |     protected val act: Activation
 22 |     protected var X: ScalarMatrix = _
 23 |     protected var dFdX: ScalarMatrix = _
 24 | 
 25 |     /**
 26 |      * Forward computation
 27 |      *
 28 |      * @param x input matrix
 29 |      * @return output matrix
 30 |      */
 31 |     override def apply(x: ScalarMatrix): ScalarMatrix
 32 | 
 33 |     /**
 34 |      * <p>Backward computation.</p>
 35 |      *
 36 |      * @note <p>
 37 |      *       Let this layer have function F composed with function <code> X(x) = W.x + b </code>
 38 |      *       and higher layer have function G.
 39 |      *       </p>
 40 |      *
 41 |      *       <p>
 42 |      *       Weight is updated with: <code>dG/dW</code>
 43 |      *       and propagate <code>dG/dx</code>
 44 |      *       </p>
 45 |      *
 46 |      *       <p>
 47 |      *       For the computation, we only used denominator layout. (cf. Wikipedia Page of Matrix Computation)
 48 |      *       For the computation rules, see "Matrix Cookbook" from MIT.
 49 |      *       </p>
 50 |      *
 51 |      * @param delta Sequence of delta amount of weight. The order must be the re of [[W]]
 52 |      * @param error to be propagated ( <code>dG / dF</code> is propagated from higher layer )
 53 |      * @return propagated error (in this case, <code>dG/dx</code> )
 54 |      */
 55 |     def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix
 56 | 
 57 |     /**
 58 |      * Sugar: Forward computation. Calls apply(x)
 59 |      *
 60 |      * @param x input matrix
 61 |      * @return output matrix
 62 |      */
 63 |     def passedBy(x: ScalarMatrix) = {
 64 |       this.X = x
 65 |       val out = apply(x)
 66 |       dFdX =
 67 |         if (act != null)
 68 |           act.derivative(out)
 69 |         else
 70 |           out
 71 |       out
 72 |     }
 73 | 
 74 |     /**
 75 |      * Translate this layer into JSON object (in Play! framework)
 76 |      * @note Please make an LayerReviver object if you're using custom layer.
 77 |      *       In that case, please specify LayerReviver object's full class name as "__reviver__,"
 78 |      *       and fill up LayerReviver.revive method.
 79 |      * @return JSON object describes this layer
 80 |      */
 81 |     def toJSON: JsObject
 82 | 
 83 |     /**
 84 |      * Sugar: Forward computation. Calls apply(x)
 85 |      *
 86 |      * @param x input matrix
 87 |      * @return output matrix
 88 |      */
 89 |     @deprecated
 90 |     protected[deep] def into_:(x: ScalarMatrix) = passedBy(x)
 91 | 
 92 |     /**
 93 |      * weights for update
 94 |      *
 95 |      * @return weights
 96 |      */
 97 |     val W: IndexedSeq[ScalarMatrix]
 98 |   }
 99 | 
100 |   /**
101 |    * __Trait__ that revives layer from JSON value
102 |    */
103 |   trait LayerReviver extends Serializable {
104 |     /**
105 |      * Revive layer using given JSON value
106 |      * @param obj JSON value to be revived
107 |      * @return Revived layer.
108 |      */
109 |     def revive(obj: JsValue): Layer
110 |   }
111 | 
112 |   /**
113 |    * Companion object of Layer
114 |    */
115 |   object Layer extends LayerReviver {
116 |     @transient val runtimeMirror = universe.synchronized(universe.runtimeMirror(getClass.getClassLoader))
117 | 
118 |     /**
119 |      * Load layer from JsObject
120 |      *
121 |      * @param obj JsObject to be parsed
122 |      * @return New layer reconstructed from this object
123 |      */
124 |     def apply(obj: JsValue) = {
125 |       val companion =
126 |         universe.synchronized {
127 |           (obj \ "reviver").asOpt[String] match {
128 |             case Some(clsName) ⇒
129 |               val module = runtimeMirror.staticModule(clsName)
130 |               runtimeMirror.reflectModule(module).instance.asInstanceOf[LayerReviver]
131 |             case None ⇒
132 |               this
133 |           }
134 |         }
135 |       companion.revive(obj)
136 |     }
137 | 
138 |     /**
139 |      * Load layer from JsObject
140 |      *
141 |      * @param obj JsObject to be parsed
142 |      * @return New layer reconstructed from this object
143 |      */
144 |     def revive(obj: JsValue) = {
145 |       val in = obj \ "in"
146 |       val out = obj \ "out"
147 |       val typeStr = (obj \ "type").as[String]
148 | 
149 |       val act = if (typeStr.endsWith("Layer")) {
150 |         Activation.apply(obj \ "act")
151 |       } else null
152 | 
153 |       val dropout = (obj \ "Dropout").asOpt[Probability]
154 |       val normalize = (obj \ "Normalize").asOpt[String]
155 | 
156 |       typeStr match {
157 |         case "NormOp" ⇒
158 |           val factor = (obj \ "factor").as[Scalar]
159 |           new NormalizeOperation(factor)
160 |         case "DropoutOp" ⇒
161 |           val presence = (obj \ "presence").as[Probability]
162 |           new DropoutOperation(presence)
163 |         case "GaussianRBF" ⇒
164 |           val w = ScalarMatrix restore (obj \ "weight").as[IndexedSeq[IndexedSeq[String]]]
165 |           val c = ScalarMatrix restore (obj \ "center").as[IndexedSeq[IndexedSeq[String]]]
166 |           val modifiable = (obj \ "canModifyCenter").as[Boolean]
167 |           (dropout, normalize) match {
168 |             case (Some(p), Some(_)) ⇒
169 |               new GaussianRBFLayer(in.as[Int], c, modifiable, w) with Dropout with Normalize withProbability p
170 |             case (Some(p), None) ⇒
171 |               new GaussianRBFLayer(in.as[Int], c, modifiable, w) with Dropout withProbability p
172 |             case (None, Some(_)) ⇒
173 |               new GaussianRBFLayer(in.as[Int], c, modifiable, w) with Normalize
174 |             case _ ⇒
175 |               new GaussianRBFLayer(in.as[Int], c, modifiable, w)
176 |           }
177 | 
178 |         case "BasicLayer" ⇒
179 |           val i = in.as[Int]
180 |           val o = out.as[Int]
181 |           val b = ScalarMatrix restore (obj \ "bias").as[IndexedSeq[IndexedSeq[String]]]
182 |           val w = ScalarMatrix restore (obj \ "weight").as[IndexedSeq[IndexedSeq[String]]]
183 |           (obj \ "reconst_bias").asOpt[IndexedSeq[IndexedSeq[String]]] match {
184 |             case Some(rbraw) ⇒
185 |               val rb = ScalarMatrix restore rbraw
186 |               (dropout, normalize) match {
187 |                 case (Some(p), Some(_)) ⇒
188 |                   new ReconBasicLayer(i → o, act, w, b, rb) with Dropout with Normalize withProbability p
189 |                 case (Some(p), None) ⇒
190 |                   new ReconBasicLayer(i → o, act, w, b, rb) with Dropout withProbability p
191 |                 case (None, Some(_)) ⇒
192 |                   new ReconBasicLayer(i → o, act, w, b, rb) with Normalize
193 |                 case _ ⇒
194 |                   new ReconBasicLayer(i → o, act, w, b, rb)
195 |               }
196 |             case None ⇒
197 |               (dropout, normalize) match {
198 |                 case (Some(p), Some(_)) ⇒
199 |                   new BasicLayer(i → o, act, w, b) with Dropout with Normalize withProbability p
200 |                 case (Some(p), None) ⇒
201 |                   new BasicLayer(i → o, act, w, b) with Dropout withProbability p
202 |                 case (None, Some(_)) ⇒
203 |                   new BasicLayer(i → o, act, w, b) with Normalize
204 |                 case _ ⇒
205 |                   new BasicLayer(i → o, act, w, b)
206 |               }
207 |           }
208 | 
209 |         case "LowerTriangularLayer" ⇒
210 |           val i = in.as[Int]
211 |           val o = out.as[Int]
212 |           val b = ScalarMatrix restore (obj \ "bias").as[IndexedSeq[IndexedSeq[String]]]
213 |           val w = ScalarMatrix restore (obj \ "weight").as[IndexedSeq[IndexedSeq[String]]]
214 | 
215 |           (dropout, normalize) match {
216 |             case (Some(p), Some(_)) ⇒
217 |               new LowerTriangularLayer(i → o, act, w, b) with Dropout with Normalize withProbability p
218 |             case (Some(p), None) ⇒
219 |               new LowerTriangularLayer(i → o, act, w, b) with Dropout withProbability p
220 |             case (None, Some(_)) ⇒
221 |               new LowerTriangularLayer(i → o, act, w, b) with Normalize
222 |             case _ ⇒
223 |               new LowerTriangularLayer(i → o, act, w, b)
224 |           }
225 | 
226 |         case "SplitTensorLayer" ⇒
227 |           val tuple = in.as[Seq[Int]]
228 |           val i = (tuple.head, tuple(1))
229 |           val o = out.as[Int]
230 |           val b = ScalarMatrix restore (obj \ "bias").as[IndexedSeq[IndexedSeq[String]]]
231 |           val quad = (obj \ "quadratic").as[Seq[IndexedSeq[IndexedSeq[String]]]] map ScalarMatrix.restore
232 |           val linear =
233 |             try {
234 |               ScalarMatrix restore (obj \ "linear").as[IndexedSeq[IndexedSeq[String]]]
235 |             } catch {
236 |               case _: Throwable ⇒
237 |                 (obj \ "linear").as[Seq[IndexedSeq[IndexedSeq[String]]]].map(ScalarMatrix.restore)
238 |                   .zipWithIndex.foldLeft(ScalarMatrix.$0(out.as[Int], tuple.sum)) {
239 |                   case (matx, (row, id)) ⇒
240 |                     matx(id to id, ::) := row
241 |                     matx
242 |                 }
243 |             }
244 | 
245 |           (dropout, normalize) match {
246 |             case (Some(p), Some(_)) ⇒
247 |               new SplitTensorLayer(i → o, act, quad, linear, b) with Dropout with Normalize withProbability p
248 |             case (Some(p), None) ⇒
249 |               new SplitTensorLayer(i → o, act, quad, linear, b) with Dropout withProbability p
250 |             case (None, Some(_)) ⇒
251 |               new SplitTensorLayer(i → o, act, quad, linear, b) with Normalize
252 |             case _ ⇒
253 |               new SplitTensorLayer(i → o, act, quad, linear, b)
254 |           }
255 |         case "FullTensorLayer" ⇒
256 |           val i = in.as[Int]
257 |           val o = out.as[Int]
258 |           val b = ScalarMatrix restore (obj \ "bias").as[IndexedSeq[IndexedSeq[String]]]
259 |           val quad = (obj \ "quadratic").as[Seq[IndexedSeq[IndexedSeq[String]]]] map ScalarMatrix.restore
260 |           val linear =
261 |             try {
262 |               ScalarMatrix restore (obj \ "linear").as[IndexedSeq[IndexedSeq[String]]]
263 |             } catch {
264 |               case _: Throwable ⇒
265 |                 (obj \ "linear").as[Seq[IndexedSeq[IndexedSeq[String]]]].map(ScalarMatrix.restore)
266 |                   .zipWithIndex.foldLeft(ScalarMatrix.$0(out.as[Int], in.as[Int])) {
267 |                   case (matx, (row, id)) ⇒
268 |                     matx(id to id, ::) := row
269 |                     matx
270 |                 }
271 |             }
272 | 
273 |           (dropout, normalize) match {
274 |             case (Some(p), Some(_)) ⇒
275 |               new FullTensorLayer(i → o, act, quad, linear, b) with Dropout with Normalize withProbability p
276 |             case (Some(p), None) ⇒
277 |               new FullTensorLayer(i → o, act, quad, linear, b) with Dropout withProbability p
278 |             case (None, Some(_)) ⇒
279 |               new FullTensorLayer(i → o, act, quad, linear, b) with Normalize
280 |             case _ ⇒
281 |               new FullTensorLayer(i → o, act, quad, linear, b)
282 |           }
283 |       }
284 |     }
285 |   }
286 | 
287 | }
288 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/network/AutoEncoder.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.network
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import kr.ac.kaist.ir.deep.layer.Reconstructable
  5 | import play.api.libs.json.Json
  6 | 
  7 | /**
  8 |  * __Network__: Single-layer Autoencoder
  9 |  *
 10 |  * @param layer A __reconstructable__ layer for this network
 11 |  * @param presence the probability of non-dropped neurons (for drop-out training). `(default : 100% = 1.0)`
 12 |  */
 13 | class AutoEncoder(val layer: Reconstructable,
 14 |                   private val presence: Probability = 1.0f)
 15 |   extends Network {
 16 |   /**
 17 |    * All weights of layers
 18 |    *
 19 |    * @return all weights of layers
 20 |    */
 21 |   override val W: IndexedSeq[ScalarMatrix] = layer.W
 22 | 
 23 |   /**
 24 |    * Compute output of neural network with given input (without reconstruction)
 25 |    * If drop-out is used, to average drop-out effect, we need to multiply output by presence probability.
 26 |    *
 27 |    * @param in an input vector
 28 |    * @return output of the vector
 29 |    */
 30 |   override def apply(in: ScalarMatrix): ScalarMatrix = layer(in)
 31 | 
 32 |   /**
 33 |    * Serialize network to JSON
 34 |    *
 35 |    * @return JsObject of this network
 36 |    */
 37 |   override def toJSON = Json.obj(
 38 |     "type" → this.getClass.getSimpleName,
 39 |     "presence" → presence.safe,
 40 |     "layers" → Json.arr(layer.toJSON)
 41 |   )
 42 | 
 43 |   /**
 44 |    * Reconstruct the given hidden value
 45 |    *
 46 |    * @param x hidden value to be reconstructed.
 47 |    * @return reconstruction value.
 48 |    */
 49 |   def reconstruct(x: ScalarMatrix): ScalarMatrix = layer.decodeBy(x)
 50 | 
 51 |   /**
 52 |    * Backpropagation algorithm
 53 |    *
 54 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
 55 |    * @param err backpropagated error from error function
 56 |    */
 57 |   override def updateBy(delta: Iterator[ScalarMatrix], err: ScalarMatrix): ScalarMatrix = {
 58 |     val e = decode_!(delta)(err)
 59 |     encode_!(delta)(e)
 60 |   }
 61 | 
 62 |   /**
 63 |    * Backpropagation algorithm for decoding phrase
 64 |    *
 65 |    * @param err backpropagated error from error function
 66 |    */
 67 |   def decode_!(delta: Iterator[ScalarMatrix])(err: ScalarMatrix) = {
 68 |     layer decodeUpdateBy(delta, err)
 69 |   }
 70 | 
 71 |   /**
 72 |    * Backpropagation algorithm for encoding phrase
 73 |    *
 74 |    * @param err backpropagated error from error function
 75 |    */
 76 |   def encode_!(delta: Iterator[ScalarMatrix])(err: ScalarMatrix) = {
 77 |     layer updateBy(delta, err)
 78 |   }
 79 | 
 80 |   /**
 81 |    * Forward computation for training.
 82 |    * If drop-out is used, we need to drop-out entry of input vector.
 83 |    *
 84 |    * @param x input matrix
 85 |    * @return output matrix
 86 |    */
 87 |   override def passedBy(x: ScalarMatrix): ScalarMatrix = decode(encode(x))
 88 | 
 89 |   /**
 90 |    * Encode computation for training.
 91 |    * If drop-out is used, we need to drop-out entry of input vector.
 92 |    *
 93 |    * @param x input matrix
 94 |    * @return hidden values
 95 |    */
 96 |   def encode(x: ScalarMatrix): ScalarMatrix = {
 97 |     layer.passedBy(x)
 98 |   }
 99 | 
100 |   /**
101 |    * Decode computation for training.
102 |    * If drop-out is used, we need to drop-out entry of input vector.
103 |    *
104 |    * @param x hidden values
105 |    * @return output matrix
106 |    */
107 |   def decode(x: ScalarMatrix): ScalarMatrix = {
108 |     layer.decodeBy(x)
109 |   }
110 | 
111 |   /**
112 |    * Sugar: Forward computation for validation. Calls apply(x)
113 |    *
114 |    * @param x input matrix
115 |    * @return output matrix
116 |    */
117 |   override def of(x: ScalarMatrix): ScalarMatrix = {
118 |     layer.decodeFrom(layer(x))
119 |   }
120 | }
121 | 
122 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/network/BasicNetwork.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.network
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.layer.Layer
 5 | import play.api.libs.json.{JsArray, Json}
 6 | 
 7 | /**
 8 |  * __Network__: A basic network implementation
 9 |  * @param layers __Sequence of layers__ of this network
10 |  */
11 | class BasicNetwork(val layers: IndexedSeq[Layer])
12 |   extends Network {
13 |   /**
14 |    * All weights of layers
15 |    *
16 |    * @return all weights of layers
17 |    */
18 |   override val W: IndexedSeq[ScalarMatrix] = layers flatMap (_.W)
19 | 
20 |   /**
21 |    * Compute output of neural network with given input
22 |    * If drop-out is used, to average drop-out effect, we need to multiply output by presence probability.
23 |    *
24 |    * @param in an input vector
25 |    * @return output of the vector
26 |    */
27 |   override def apply(in: ScalarMatrix): ScalarMatrix = {
28 |     layers.foldLeft(in) {
29 |       case (v, l) ⇒ l apply v
30 |     }
31 |   }
32 | 
33 |   /**
34 |    * Serialize network to JSON
35 |    *
36 |    * @return JsObject of this network
37 |    */
38 |   override def toJSON = Json.obj(
39 |     "type" → this.getClass.getSimpleName,
40 |     "layers" → JsArray(layers map (_.toJSON))
41 |   )
42 | 
43 |   /**
44 |    * Backpropagation algorithm
45 |    *
46 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
47 |    * @param err backpropagated error from error function
48 |    */
49 |   override def updateBy(delta: Iterator[ScalarMatrix], err: ScalarMatrix): ScalarMatrix = {
50 |     layers.foldRight(err) {
51 |       case (l, e) ⇒ l updateBy(delta, e)
52 |     }
53 |   }
54 | 
55 |   /**
56 |    * Forward computation for training.
57 |    * If drop-out is used, we need to drop-out entry of input vector.
58 |    *
59 |    * @param x input matrix
60 |    * @return output matrix
61 |    */
62 |   override def passedBy(x: ScalarMatrix): ScalarMatrix = {
63 |     layers.foldLeft(x) {
64 |       case (v, l) ⇒ l passedBy v
65 |     }
66 |   }
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/network/StackedAutoEncoder.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.network
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.ScalarMatrix
 4 | import play.api.libs.json.{JsObject, Json}
 5 | 
 6 | import scala.collection.mutable.ArrayBuffer
 7 | 
 8 | /**
 9 |  * __Network__: Stack of autoencoders. 
10 |  *
11 |  * @param encoders __Sequence of AutoEncoders__ to be stacked.
12 |  */
13 | class StackedAutoEncoder(val encoders: Seq[AutoEncoder]) extends Network {
14 |   /**
15 |    * All weights of layers
16 |    *
17 |    * @return all weights of layers
18 |    */
19 |   override val W: IndexedSeq[ScalarMatrix] = {
20 |     val matrices = ArrayBuffer[ScalarMatrix]()
21 |     encoders.flatMap(_.W).foreach(matrices += _)
22 |     matrices
23 |   }
24 | 
25 |   /**
26 |    * Serialize network to JSON
27 |    *
28 |    * @return JsObject of this network
29 |    */
30 |   override def toJSON: JsObject =
31 |     Json.obj(
32 |       "type" → this.getClass.getSimpleName,
33 |       "stack" → Json.arr(encoders map (_.toJSON))
34 |     )
35 | 
36 |   /**
37 |    * Compute output of neural network with given input (without reconstruction)
38 |    * If drop-out is used, to average drop-out effect, we need to multiply output by presence probability.
39 |    *
40 |    * @param in an input vector
41 |    * @return output of the vector
42 |    */
43 |   override def apply(in: ScalarMatrix): ScalarMatrix = {
44 |     encoders.foldLeft(in) {
45 |       case (v, l) ⇒ l apply v
46 |     }
47 |   }
48 | 
49 |   /**
50 |    * Sugar: Forward computation for training. Calls apply(x)
51 |    *
52 |    * @param x input matrix
53 |    * @return output matrix
54 |    */
55 |   override def passedBy(x: ScalarMatrix): ScalarMatrix = {
56 |     encoders.foldLeft(x) {
57 |       case (v, l) ⇒ l passedBy v
58 |     }
59 |   }
60 | 
61 |   /**
62 |    * Backpropagation algorithm
63 |    *
64 |    * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
65 |    * @param err backpropagated error from error function
66 |    */
67 |   override def updateBy(delta: Iterator[ScalarMatrix], err: ScalarMatrix): ScalarMatrix = {
68 |     encoders.foldRight(err) {
69 |       case (l, e) ⇒ l updateBy(delta, e)
70 |     }
71 |   }
72 | }
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/network/package.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep
  2 | 
  3 | import java.io.Serializable
  4 | 
  5 | import kr.ac.kaist.ir.deep.fn.{Activation, Probability, ScalarMatrix}
  6 | import kr.ac.kaist.ir.deep.layer.{BasicLayer, Layer, Reconstructable}
  7 | import play.api.libs.json.{JsArray, JsObject, JsValue, Json}
  8 | 
  9 | import scala.collection.mutable.ArrayBuffer
 10 | import scala.io.Codec
 11 | import scala.reflect.io.{File, Path}
 12 | import scala.reflect.runtime.universe
 13 | 
 14 | /**
 15 |  * Package for network structure
 16 |  */
 17 | package object network {
 18 | 
 19 |   /**
 20 |    * __Trait__: Network interface
 21 |    */
 22 |   trait Network extends (ScalarMatrix ⇒ ScalarMatrix) with Serializable {
 23 |     /**
 24 |      * All weights of layers
 25 |      *
 26 |      * @return all weights of layers
 27 |      */
 28 |     val W: IndexedSeq[ScalarMatrix]
 29 | 
 30 |     /**
 31 |      * Serialize network to JSON
 32 |      * @note Please make an NetReviver object if you're using custom network.
 33 |      *       In that case, please specify NetReviver object's full class name as "__reviver__,"
 34 |      *       and fill up NetReviver.revive method.
 35 |      *
 36 |      * @return JsObject of this network
 37 |      */
 38 |     def toJSON: JsObject
 39 | 
 40 |     /**
 41 |      * Backpropagation algorithm
 42 |      *
 43 |      * @param delta Sequence of delta amount of weight. The order must be the reverse of [[W]]
 44 |      * @param err backpropagated error from error function
 45 |      */
 46 |     def updateBy(delta: Iterator[ScalarMatrix], err: ScalarMatrix): ScalarMatrix
 47 | 
 48 |     /**
 49 |      * Forward computation for training
 50 |      *
 51 |      * @param x input matrix
 52 |      * @return output matrix
 53 |      */
 54 |     def passedBy(x: ScalarMatrix): ScalarMatrix
 55 | 
 56 |     /**
 57 |      * Forward computation for training
 58 |      *
 59 |      * @param x input matrix
 60 |      * @return output matrix
 61 |      */
 62 |     @deprecated
 63 |     def into_:(x: ScalarMatrix): ScalarMatrix = passedBy(x)
 64 | 
 65 |     /**
 66 |      * Sugar: Forward computation for validation. Calls apply(x)
 67 |      *
 68 |      * @param x input matrix
 69 |      * @return output matrix
 70 |      */
 71 |     def of(x: ScalarMatrix): ScalarMatrix = apply(x)
 72 | 
 73 |     /**
 74 |      * Save given network into given file.
 75 |      * @param path Path to save this network.
 76 |      * @param codec Codec used for writer. `(Default: Codec.UTF8)`
 77 |      */
 78 |     def saveAsJsonFile(path: Path, codec: Codec = Codec.UTF8): Unit = {
 79 |       val writer = File(path).bufferedWriter(append = false, codec = codec)
 80 |       writer.write(Json.prettyPrint(this.toJSON))
 81 |       writer.close()
 82 |     }
 83 |   }
 84 | 
 85 |   /**
 86 |    * __Trait__ of Network Reviver (Companion) objects
 87 |    */
 88 |   trait NetReviver extends Serializable {
 89 |     /**
 90 |      * Revive network using given JSON value
 91 |      * @param obj JSON value to be revived
 92 |      * @return Revived network.
 93 |      */
 94 |     def revive(obj: JsValue): Network
 95 |   }
 96 | 
 97 |   /**
 98 |    * Companion object of BasicNetwork
 99 |    */
100 |   object Network extends NetReviver {
101 |     @transient lazy val runtimeMirror = universe.synchronized(universe.runtimeMirror(getClass.getClassLoader))
102 |     /**
103 |      * Construct network from given layer size information
104 |      *
105 |      * @param act Activation function for activation function
106 |      * @param layerSizes Sizes for construct layers
107 |      */
108 |     def apply(act: Activation, layerSizes: Int*): Network = {
109 |       val layers = ArrayBuffer[Layer]()
110 |       layers ++= layerSizes.indices.tail.map {
111 |         i ⇒ new BasicLayer(layerSizes(i - 1) → layerSizes(i), act)
112 |       }
113 |       new BasicNetwork(layers)
114 |     }
115 | 
116 |     /**
117 |      * Load network from given file.
118 |      * @param path Path to save this network.
119 |      * @param codec Codec used for writer. `(Default: Codec.UTF8)`
120 |      *
121 |      * @tparam T Type of network casted into.
122 |      */
123 |     def jsonFile[T >: Network](path: Path, codec: Codec = Codec.UTF8): T = {
124 |       val line = File(path).lines(codec).mkString("")
125 |       val json = Json.parse(line)
126 |       apply(json).asInstanceOf[T]
127 |     }
128 | 
129 |     /**
130 |      * Load network from JsObject
131 |      *
132 |      * @param obj JsObject to be parsed
133 |      * @return New Network reconstructed from this object
134 |      */
135 |     def apply(obj: JsValue): Network = {
136 |       val companion =
137 |         universe.synchronized {
138 |           (obj \ "reviver").asOpt[String] match {
139 |             case Some(clsName) ⇒
140 |               val module = runtimeMirror.staticModule(clsName)
141 |               runtimeMirror.reflectModule(module).instance.asInstanceOf[NetReviver]
142 |             case None ⇒
143 |               this
144 |           }
145 |         }
146 |       companion.revive(obj)
147 |     }
148 | 
149 |     /**
150 |      * Revive network using given JSON value
151 |      * @param obj JSON value to be revived
152 |      * @return Revived network.
153 |      */
154 |     override def revive(obj: JsValue): Network = {
155 |       (obj \ "type").as[String] match {
156 |         case "AutoEncoder" ⇒ AutoEncoder(obj)
157 |         case "BasicNetwork" ⇒ BasicNetwork(obj)
158 |         case "StackedAutoEncoder" ⇒ StackedAutoEncoder(obj)
159 |       }
160 |     }
161 | 
162 |     /**
163 |      * Load network from JsObject
164 |      *
165 |      * @param obj JsObject to be parsed
166 |      * @return New AutoEncoder reconstructed from this object
167 |      */
168 |     def AutoEncoder(obj: JsValue): AutoEncoder = {
169 |       val layers = (obj \ "layers").as[JsArray].value map Layer.apply
170 |       val presence = (obj \ "presence").as[Probability]
171 |       new AutoEncoder(layers.head.asInstanceOf[Reconstructable], presence)
172 |     }
173 | 
174 |     /**
175 |      * Load network from JsObject
176 |      *
177 |      * @param obj JsObject to be parsed
178 |      * @return New Basic Network reconstructed from this object
179 |      */
180 |     def BasicNetwork(obj: JsValue): BasicNetwork = {
181 |       val layers = ArrayBuffer[Layer]()
182 |       layers ++= (obj \ "layers").as[JsArray].value.map(Layer.apply)
183 |       new BasicNetwork(layers)
184 |     }
185 | 
186 |     /**
187 |      * Load network from JsObject
188 |      *
189 |      * @param obj JsObject to be parsed
190 |      * @return New Stacked AutoEncoder reconstructed from this object
191 |      */
192 |     def StackedAutoEncoder(obj: JsValue): StackedAutoEncoder = {
193 |       val layers = (obj \ "stack").as[Seq[JsObject]] map Network.AutoEncoder
194 |       new StackedAutoEncoder(layers)
195 |     }
196 | 
197 |   }
198 | 
199 | }
200 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/package.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir
 2 | 
 3 | /**
 4 |  * A ''Neural Network implementation'' with Scala, [[https://github.com/scalanlp/breeze Breeze]] & [[http://spark.apache.org Spark]]
 5 |  *
 6 |  * @example
 7 |  * {{{// Define 2 -> 4 -> 1 Layered, Fully connected network.
 8 |  *        val net = Network(Sigmoid, 2, 4, 1)
 9 |  *
10 |  *          // Define Manipulation Type. VectorType, AEType, RAEType, StandardRAEType, URAEType, and StringToVectorType.
11 |  *         val operation = new VectorType(
12 |  *            corrupt = GaussianCorruption(variance = 0.1)
13 |  *         )
14 |  *
15 |  *        // Define Training Style. SingleThreadTrainStyle vs DistBeliefTrainStyle
16 |  *         val style = new SingleThreadTrainStyle(
17 |  *           net = net,
18 |  *           algorithm = new StochasticGradientDescent(l2decay = 0.0001),
19 |  *            make = operation,
20 |  *            param = SimpleTrainingCriteria(miniBatchFraction = 0.01))
21 |  *
22 |  *        // Define Trainer
23 |  *        val train = new Trainer(
24 |  *           style = style,
25 |  *           stops = StoppingCriteria(maxIter = 100000))
26 |  *
27 |  *        // Do Train
28 |  *        train.train(set, valid)}}}
29 |  */
30 | package object deep
31 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/rec/BinaryTree.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.rec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.train.Corruption
 5 | 
 6 | /**
 7 |  * __Node__ for internal structure (non-terminal)
 8 |  */
 9 | class BinaryTree(val left: Node, right: Node) extends Node {
10 | 
11 |   /**
12 |    * Forward computation of Binary Tree
13 |    *
14 |    * @param fn function to be applied
15 |    * @return the result
16 |    */
17 |   override def forward(fn: ScalarMatrix ⇒ ScalarMatrix): ScalarMatrix = {
18 |     val leftMatx = left.forward(fn)
19 |     val rightMatx = right.forward(fn)
20 |     fn(leftMatx row_+ rightMatx)
21 |   }
22 | 
23 |   /**
24 |    * Backward computation of Binary Tree
25 |    *
26 |    * @param err Matrix to be propagated
27 |    * @param fn function to be applied
28 |    * @return Sequence of terminal nodes              
29 |    */
30 |   def backward(err: ScalarMatrix, fn: ScalarMatrix ⇒ ScalarMatrix): Seq[Leaf] = {
31 |     val error = fn(err)
32 |     val rSize = error.rows / 2
33 | 
34 |     val seqLeft = left.backward(error(0 until rSize, ::), fn)
35 |     val seqRight = right.backward(error(rSize to -1, ::), fn)
36 |     seqLeft ++ seqRight
37 |   }
38 | 
39 |   /**
40 |    * Corrupt this node
41 |    * *
42 |    * @param corrupt Corruption function to be applied
43 |    * @return Corrupted Binary Tree
44 |    */
45 |   override def through(corrupt: Corruption): Node =
46 |     new BinaryTree(left through corrupt, right through corrupt)
47 | 
48 |   /**
49 |    * Replace wildcard node
50 |    * @param resolve Wildcard Resolver function
51 |    * @return new Node without wildcard
52 |    */
53 |   override def ?(resolve: (Int) ⇒ Node): Node = {
54 |     val newLeft = left ? resolve
55 |     val newRight = right ? resolve
56 | 
57 |     if (left.equals(newLeft) && right.equals(newRight))
58 |       this
59 |     else
60 |       new BinaryTree(newLeft, newRight)
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/rec/Leaf.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.rec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.ScalarMatrix
 4 | import kr.ac.kaist.ir.deep.train.Corruption
 5 | 
 6 | /**
 7 |  * __Node of BinaryTree__ whose position is terminal.
 8 |  *
 9 |  * This node does not do any computation.
10 |  *
11 |  * @param x original value matrix
12 |  */
13 | class Leaf(val x: ScalarMatrix) extends Node {
14 |   var out: ScalarMatrix = x
15 | 
16 |   /**
17 |    * Forward computation of Binary Tree
18 |    *
19 |    * @param fn function to be applied
20 |    * @return the result
21 |    */
22 |   override def forward(fn: ScalarMatrix ⇒ ScalarMatrix): ScalarMatrix = out
23 | 
24 |   /**
25 |    * Backward computation of Binary Tree
26 |    *
27 |    * @param err Matrix to be propagated
28 |    * @param fn function to be applied
29 |    * @return Sequence of terminal nodes              
30 |    */
31 |   def backward(err: ScalarMatrix, fn: ScalarMatrix ⇒ ScalarMatrix): Seq[Leaf] = {
32 |     out = err
33 |     Seq(this)
34 |   }
35 | 
36 |   /**
37 |    * Corrupt this node
38 |    * *
39 |    * @param corrupt Corruption function to be applied
40 |    * @return Corrupted Binary Tree
41 |    */
42 |   override def through(corrupt: Corruption): Node =
43 |     new Leaf(corrupt(x))
44 | 
45 |   /**
46 |    * Replace wildcard node
47 |    * @param resolve Wildcard Resolver function
48 |    * @return new Node without wildcard
49 |    */
50 |   override def ?(resolve: (Int) ⇒ Node): Node = this
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/rec/Node.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.rec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.ScalarMatrix
 4 | import kr.ac.kaist.ir.deep.train.Corruption
 5 | 
 6 | /**
 7 |  * __Trait__ that describes a node in BinaryTree.
 8 |  */
 9 | trait Node extends Serializable {
10 |   /**
11 |    * Forward computation of Binary Tree
12 |    *
13 |    * @param fn function to be applied
14 |    * @return the result
15 |    */
16 |   def forward(fn: ScalarMatrix ⇒ ScalarMatrix): ScalarMatrix
17 | 
18 |   /**
19 |    * Backward computation of Binary Tree
20 |    *
21 |    * @param err Matrix to be propagated
22 |    * @param fn function to be applied
23 |    * @return Sequence of terminal nodes              
24 |    */
25 |   def backward(err: ScalarMatrix, fn: ScalarMatrix ⇒ ScalarMatrix): Seq[Leaf]
26 | 
27 |   /**
28 |    * Corrupt this node
29 |    * *
30 |    * @param corrupt Corruption function to be applied
31 |    * @return Corrupted Binary Tree
32 |    */
33 |   def through(corrupt: Corruption): Node
34 | 
35 |   /**
36 |    * Replace wildcard node
37 |    * @param resolve Wildcard Resolver function
38 |    * @return new Node without wildcard
39 |    */
40 |   def ?(resolve: Int ⇒ Node): Node
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/rec/WildcardLeaf.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.rec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.ScalarMatrix
 4 | import kr.ac.kaist.ir.deep.train.Corruption
 5 | 
 6 | /**
 7 |  * __Node of BinaryTree__ whose position is terminal.
 8 |  *
 9 |  * This node does not do any computation.
10 |  *
11 |  * @param id ID of wildcard entry
12 |  */
13 | class WildcardLeaf(val id: Int) extends Node {
14 |   /**
15 |    * Forward computation of Binary Tree
16 |    *
17 |    * @param fn function to be applied
18 |    * @return the result
19 |    */
20 |   override def forward(fn: ScalarMatrix ⇒ ScalarMatrix): ScalarMatrix = null
21 | 
22 |   /**
23 |    * Backward computation of Binary Tree
24 |    *
25 |    * @param err Matrix to be propagated
26 |    * @param fn function to be applied
27 |    * @return Sequence of terminal nodes              
28 |    */
29 |   def backward(err: ScalarMatrix, fn: ScalarMatrix ⇒ ScalarMatrix): Seq[Leaf] = Seq()
30 | 
31 |   /**
32 |    * Corrupt this node
33 |    * *
34 |    * @param corrupt Corruption function to be applied
35 |    * @return Corrupted Binary Tree
36 |    */
37 |   override def through(corrupt: Corruption): Node = this
38 | 
39 |   /**
40 |    * Replace wildcard node
41 |    * @param resolve Wildcard Resolver function
42 |    * @return new Node without wildcard
43 |    */
44 |   override def ?(resolve: (Int) ⇒ Node): Node = resolve(id)
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/rec/package.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep
 2 | 
 3 | /**
 4 |  * Package object for DAG
 5 |  */
 6 | package object rec
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/AEType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | 
 6 | /**
 7 |  * __Input Operation__ : Vector as Input & Auto Encoder Training (no output type)
 8 |  *
 9 |  * @param corrupt Corruption that supervises how to corrupt the input matrix. (Default : [[NoCorruption]])
10 |  * @param error An objective function (Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])
11 |  *
12 |  * @example
13 |   * {{{var make = new AEType(error = CrossEntropyErr)
14 |  *                       var corruptedIn = make corrupted in
15 |  *                       var out = make onewayTrip (net, corruptedIn)}}}
16 |  */
17 | class AEType(override val corrupt: Corruption = NoCorruption,
18 |              override val error: Objective = SquaredErr)
19 |   extends ManipulationType[ScalarMatrix, Null] {
20 | 
21 |   /**
22 |    * Corrupt input
23 |    *
24 |    * @param x input to be corrupted
25 |    * @return corrupted input
26 |    */
27 |   override def corrupted(x: ScalarMatrix): ScalarMatrix = corrupt(x)
28 | 
29 |   /**
30 |    * Apply & Back-prop given single input
31 |    *
32 |    * @param net A network that gets input
33 |    * @param delta Sequence of delta updates
34 |    */
35 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: ScalarMatrix, real: Null) ⇒ {
36 |     val out = net passedBy in
37 |     val err: ScalarMatrix = error.derivative(in, out)
38 |     net updateBy(delta.toIterator, err)
39 |   }
40 | 
41 |   /**
42 |    * Apply given input and compute the error
43 |    *
44 |    * @param net A network that gets input
45 |    * @param pair (Input, Real output) for error computation.
46 |    * @return error of this network
47 |    */
48 |   def lossOf(net: Network)(pair: (ScalarMatrix, Null)): Scalar = {
49 |     val in = pair._1
50 |     val out = net of in
51 |     error(in, out)
52 |   }
53 | 
54 |   /**
55 |    * Apply given single input as one-way forward trip.
56 |    *
57 |    * @param net A network that gets input
58 |    * @param x input to be computed
59 |    * @return output of the network.
60 |    */
61 |   override def onewayTrip(net: Network, x: ScalarMatrix): ScalarMatrix = net of x
62 | 
63 | 
64 |   /**
65 |    * Make validation output
66 |    *
67 |    * @return input as string
68 |    */
69 |   def stringOf(net: Network, pair: (ScalarMatrix, Null)): String = {
70 |     val in = pair._1
71 |     val out = net of in
72 |     s"IN: ${in.mkString} RECON → OUT: ${out.mkString}"
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/DistBeliefTrainStyle.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import kr.ac.kaist.ir.deep.network._
  5 | import org.apache.spark.SparkContext
  6 | import org.apache.spark.broadcast.Broadcast
  7 | 
  8 | import scala.collection.mutable.ArrayBuffer
  9 | import scala.concurrent.ExecutionContext.Implicits.global
 10 | import scala.concurrent._
 11 | import scala.reflect._
 12 | 
 13 | /**
 14 |  * __Train Style__ : Semi-DistBelief Style, Spark-based.
 15 |  *
 16 |  * @note Unlike with DistBelief, this trainer do updates and fetch by '''master''' not the '''workers'''.
 17 |  *
 18 |  * @param net __Network__ to be trained
 19 |  * @param algorithm Weight __update algorithm__ to be applied
 20 |  * @param sc A __spark context__ that network will be distributed
 21 |  * @param make __Input Operation__ that supervises how to manipulate input as matrices.
 22 |  *             This also controls how to compute actual network. (default: [[VectorType]])
 23 |  * @param param __DistBelief-style__ Training criteria (default: [[DistBeliefCriteria]])
 24 |  */
 25 | class DistBeliefTrainStyle[IN: ClassTag, OUT: ClassTag](net: Network,
 26 |                                                         algorithm: WeightUpdater,
 27 |                                                         @transient sc: SparkContext,
 28 |                                                         make: ManipulationType[IN, OUT] = new VectorType(),
 29 |                                                         param: DistBeliefCriteria = DistBeliefCriteria())
 30 |   extends MultiThreadTrainStyle[IN, OUT](net, algorithm, sc, make, param) {
 31 |   /** Flag for batch : Is Batch remaining? */
 32 |   @transient protected var batchFlag = ArrayBuffer[Future[Unit]]()
 33 |   /** Flag for fetch : Is fetching? */
 34 |   @transient protected var fetchFlag: Future[Unit] = null
 35 |   /** Flag for update : Is updating? */
 36 |   @transient protected var updateFlag: Future[Unit] = null
 37 |   /** Spark distributed networks */
 38 |   protected var bcNet: Broadcast[Network] = _
 39 | 
 40 |   /**
 41 |    * Fetch weights
 42 |    *
 43 |    * @param iter current iteration
 44 |    */
 45 |   override def fetch(iter: Int): Unit =
 46 |     if (iter % param.fetchStep == 0) {
 47 |       if (fetchFlag != null && !fetchFlag.isCompleted) {
 48 |         logger warn "Fetch command arrived before previous fetch is done. Need more steps between fetch commands!"
 49 |       }
 50 | 
 51 |       fetchFlag =
 52 |         future {
 53 |           val oldNet = bcNet
 54 |           bcNet = sc.broadcast(net)
 55 | 
 56 |           // Because DistBelief submit fetching job after n_fetch steps,
 57 |           // submit this fetch after already submitted jobs are done.
 58 |           // This does not block others because batch can be submitted anyway, 
 59 |           // and that batch does not affect this thread. 
 60 |           stopUntilBatchFinished()
 61 | 
 62 |           future {
 63 |             Thread.sleep(param.submitInterval.toMillis * param.fetchStep)
 64 |             oldNet.destroy()
 65 |           }
 66 |         }
 67 |     }
 68 | 
 69 |   /**
 70 |    * Non-blocking pending, until all assigned batches are finished
 71 |    */
 72 |   override def stopUntilBatchFinished(): Unit = {
 73 |     AsyncAwait.readyAll(param.submitInterval, batchFlag: _*)
 74 |     batchFlag = batchFlag.filterNot(_.isCompleted)
 75 |   }
 76 | 
 77 |   /**
 78 |    * Send update of weights
 79 |    *
 80 |    * @param iter current iteration
 81 |    */
 82 |   override def update(iter: Int): Unit =
 83 |     if (iter % param.updateStep == 0) {
 84 |       if (updateFlag != null && !updateFlag.isCompleted) {
 85 |         logger warn "Update command arrived before previous update is done. Need more steps between update commands!"
 86 |       }
 87 | 
 88 |       updateFlag =
 89 |         future {
 90 |           // Because DistBelief submit updating job after n_update steps,
 91 |           // Submit this update after already submitted jobs are done.
 92 |           // This does not block others because batch can be submitted anyway,
 93 |           // and that batch does not affect this thread.
 94 |           stopUntilBatchFinished()
 95 | 
 96 |           val dWUpdate = accNet.value.reverse
 97 |           accNet.setValue(WeightAccumulator.zero(accNet.zero))
 98 |           val count = accCount.value
 99 |           accCount.setValue(0)
100 | 
101 |           dWUpdate :/= count.toFloat
102 |           net.W -= dWUpdate
103 |         }
104 |     }
105 | 
106 |   /**
107 |    * Indicates whether the asynchrononus update is finished or not.
108 |    *
109 |    * @return future object of update
110 |    */
111 |   override def isUpdateFinished: Future[_] = updateFlag
112 | 
113 |   /**
114 |    * Do mini-batch
115 |    */
116 |   override def batch(): Unit = {
117 |     val part = partFunction(bcNet)
118 |     val x = if (param.miniBatchFraction > 0) {
119 |       val rddSet = trainingSet.sample(withReplacement = true, fraction = param.miniBatchFraction)
120 |         .repartition(param.numCores)
121 | 
122 |       val x = rddSet foreachPartitionAsync part
123 |       batchFlag += x
124 | 
125 |       x.onComplete {
126 |         _ ⇒ rddSet.unpersist()
127 |       }
128 |       x
129 |     } else {
130 |       val x = trainingSet foreachPartitionAsync part
131 |       batchFlag += x
132 | 
133 |       x
134 |     }
135 | 
136 |     try {
137 |       Await.ready(x, param.submitInterval)
138 |     } catch {
139 |       case _: Throwable ⇒
140 |     }
141 |   }
142 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/ManipulationType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn.{Objective, Scalar, ScalarMatrix}
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | 
 6 | /**
 7 |  * __Trait__ that describes how to convert input into corrupted matrix
 8 |  *
 9 |  * Input operation corrupts the given input, and apply network propagations onto matrix representation of input 
10 |  *
11 |  * @tparam IN the type of input
12 |  * @tparam OUT the type of output
13 |  */
14 | trait ManipulationType[IN, OUT] extends Serializable {
15 |   /** Corruption function */
16 |   val corrupt: Corruption
17 |   /** Objective function */
18 |   val error: Objective
19 | 
20 |   // We didn't assign a "network" value, because of dist-belief training style.
21 | 
22 |   /**
23 |    * Corrupt input
24 |    *
25 |    * @param x input to be corrupted 
26 |    * @return corrupted input
27 |    */
28 |   def corrupted(x: IN): IN
29 | 
30 |   /**
31 |    * Apply & Back-prop given single input
32 |    *
33 |    * @param net A network that gets input
34 |    * @param delta Sequence of delta updates
35 |    */
36 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]): (IN, OUT) ⇒ Unit
37 | 
38 |   /**
39 |    * Apply given single input as one-way forward trip.
40 |    *
41 |    * @param net A network that gets input
42 |    * @param x input to be computed
43 |    * @return output of the network.
44 |    */
45 |   def onewayTrip(net: Network, x: IN): ScalarMatrix
46 | 
47 |   /**
48 |    * Make validation output
49 |    *
50 |    * @param net A network that gets input
51 |    * @param in (Input, Real output) pair for computation
52 |    * @return input as string
53 |    */
54 |   def stringOf(net: Network, in: (IN, OUT)): String
55 | 
56 |   /**
57 |    * Apply given input and compute the error
58 |    *
59 |    * @param net A network that gets input  
60 |    * @param pair (Input, Real output) for error computation.
61 |    * @return error of this network
62 |    */
63 |   def lossOf(net: Network)(pair: (IN, OUT)): Scalar
64 | 
65 |   /**
66 |    * Check whether given two are same or not.
67 |    * @param x Out-type object
68 |    * @param y Out-type object
69 |    * @return True if they are different.
70 |    */
71 |   def different(x: OUT, y: OUT): Boolean = true
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/MultiThreadTrainStyle.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn.{ScalarMatrix, WeightSeqOp, WeightUpdater}
  4 | import kr.ac.kaist.ir.deep.network.Network
  5 | import org.apache.spark.SparkContext
  6 | import org.apache.spark.broadcast.Broadcast
  7 | import org.apache.spark.rdd.RDD
  8 | 
  9 | import scala.concurrent.ExecutionContext.Implicits.global
 10 | import scala.concurrent._
 11 | import scala.concurrent.duration._
 12 | import scala.reflect.ClassTag
 13 | 
 14 | /**
 15 |  * __Trainer__ : Stochastic-Style, Multi-Threaded using Spark.
 16 |  *
 17 |  * @note This is not a implementation using DistBelief Paper.
 18 |  *       This is between [[DistBeliefTrainStyle]](DBTS) and [[SingleThreadTrainStyle]](STTS).
 19 |  *       The major difference is whether "updating" is asynchronous(DBTS) or not(MTTS).
 20 |  *
 21 |  * @param net __Network__ to be trained
 22 |  * @param algorithm Weight __update algorithm__ to be applied
 23 |  * @param make __Input Operation__ that supervises how to manipulate input as matrices.
 24 |  *             This also controls how to compute actual network. (default: [[VectorType]])
 25 |  * @param param __Training criteria__ (default: [[SimpleTrainingCriteria]])
 26 |  */
 27 | class MultiThreadTrainStyle[IN: ClassTag, OUT: ClassTag](override val net: Network,
 28 |                                                          override val algorithm: WeightUpdater,
 29 |                                                          @transient val sc: SparkContext,
 30 |                                                          override val make: ManipulationType[IN, OUT] = new VectorType(),
 31 |                                                          override val param: DistBeliefCriteria = DistBeliefCriteria())
 32 |   extends TrainStyle[IN, OUT] {
 33 |   /** Accumulator variable for networks */
 34 |   protected val accNet = sc.accumulator(WeightAccumulator.zero(net.W).reverse)(WeightAccumulator)
 35 |   protected val weightSizes = sc.broadcast(net.W.map(m ⇒ m.rows → m.cols).reverse)
 36 |   /** Accumulator variable for counter */
 37 |   protected val accCount = sc.accumulator(0)
 38 |   /** Training set */
 39 |   protected var trainingSet: RDD[Pair] = null
 40 |   /** Test Set */
 41 |   protected var testSet: RDD[Pair] = null
 42 | 
 43 |   /**
 44 |    * Unpersist all
 45 |    */
 46 |   def unpersist(blocking: Boolean = false): Unit = {
 47 |     if (trainingSet != null)
 48 |       trainingSet.unpersist(blocking = blocking)
 49 |     if (testSet != null)
 50 |       testSet.unpersist(blocking = blocking)
 51 |     weightSizes.unpersist(blocking = false)
 52 |   }
 53 | 
 54 |   /**
 55 |    * Fetch weights
 56 |    *
 57 |    * @param iter current iteration
 58 |    */
 59 |   override def fetch(iter: Int): Unit = {
 60 |     accNet.value.par.map(_ := 0f)
 61 |     accCount.setValue(0)
 62 |   }
 63 | 
 64 |   /**
 65 |    * Send update of weights
 66 |    *
 67 |    * @param iter current iteration
 68 |    */
 69 |   override def update(iter: Int): Unit = {
 70 |     val dWUpdate = accNet.value.reverse
 71 |     val cnt = accCount.value.toFloat
 72 |     if (cnt > 0) {
 73 |       dWUpdate :/= cnt
 74 |       net.W -= dWUpdate
 75 |     } else {
 76 |       logger.warn(s"Epoch $iter trained with 0 instances. Please check.")
 77 |     }
 78 |   }
 79 | 
 80 |   /**
 81 |    * Do mini-batch
 82 |    */
 83 |   override def batch(): Unit = {
 84 |     val bcNet = sc.broadcast(net)
 85 |     val part = partFunction(bcNet)
 86 |     if (param.miniBatchFraction > 0) {
 87 |       val set = trainingSet.sample(withReplacement = true, fraction = param.miniBatchFraction)
 88 |       set.foreachPartition(part)
 89 |       set.unpersist(blocking = false)
 90 |     } else {
 91 |       trainingSet.foreachPartition(part)
 92 |     }
 93 |     bcNet.unpersist(blocking = false)
 94 |   }
 95 | 
 96 |   protected final def partFunction(net: Broadcast[Network]) = {
 97 | 
 98 |     (part: Iterator[(IN, OUT)]) ⇒ {
 99 |       var count = 0
100 |       val f = future {
101 |         lazy val dW = weightSizes.value.map(ScalarMatrix.$0)
102 |         lazy val trip = make.roundTrip(net.value, dW)
103 | 
104 |         part.foreach {
105 |           case (x, y) ⇒
106 |             count += 1
107 |             trip(x, y)
108 |         }
109 | 
110 |         accCount += count
111 |         accNet += dW
112 |       }
113 | 
114 |       AsyncAwait.ready(f, 1.second)
115 |     }
116 |   }
117 | 
118 |   /**
119 |    * Set training instances
120 |    * @param set Sequence of training set
121 |    */
122 |   override def setPositiveTrainingReference(set: Seq[(IN, OUT)]): Unit = {
123 |     val rdd =
124 |       if (param.repartitionOnStart) sc.parallelize(set, param.numCores)
125 |       else sc.parallelize(set)
126 |     trainingSet = rdd.setName("Positives").persist(param.storageLevel)
127 |     validationEpoch = if (param.miniBatchFraction > 0) Math.round(1.0f / param.miniBatchFraction) else 1
128 |   }
129 | 
130 |   /**
131 |    * Set training instances
132 |    * @param set RDD of training set
133 |    */
134 |   override def setPositiveTrainingReference(set: RDD[(IN, OUT)]): Unit = {
135 |     val rdd =
136 |       if (param.repartitionOnStart) set.repartition(param.numCores).persist(param.storageLevel)
137 |       else set
138 |     trainingSet = rdd.setName(set.name + " (Positives)")
139 |     validationEpoch = if (param.miniBatchFraction > 0) Math.round(1.0f / param.miniBatchFraction) else 1
140 |   }
141 | 
142 |   /**
143 |    * Set testing instances
144 |    * @param set Sequence of testing set
145 |    */
146 |   override def setTestReference(set: Seq[(IN, OUT)]): Unit = {
147 |     val rdd =
148 |       if (param.repartitionOnStart) sc.parallelize(set, param.numCores)
149 |       else sc.parallelize(set)
150 |     testSet = rdd.setName("Validation").persist(param.storageLevel)
151 |   }
152 | 
153 |   /**
154 |    * Set testing instances
155 |    * @param set RDD of testing set
156 |    */
157 |   override def setTestReference(set: RDD[(IN, OUT)]): Unit = {
158 |     val rdd =
159 |       if (param.repartitionOnStart) set.repartition(param.numCores).persist(param.storageLevel)
160 |       else set
161 |     testSet = rdd.setName(set.name + " (Validation)")
162 |   }
163 | 
164 |   /**
165 |    * Iterate over given number of test instances
166 |    * @param n number of random sampled instances
167 |    * @param fn iteratee function
168 |    */
169 |   override def foreachTestSet(n: Int)(fn: ((IN, OUT)) ⇒ Unit): Unit = {
170 |     var seq = testSet.takeSample(withReplacement = true, num = n)
171 |     while (seq.nonEmpty) {
172 |       fn(seq.head)
173 |       seq = seq.tail
174 |     }
175 |   }
176 | 
177 |   /**
178 |    * Calculate validation error
179 |    *
180 |    * @return validation error
181 |    */
182 |   def validationError() = {
183 |     val loss = sc.accumulator(0.0f)
184 |     val count = sc.accumulator(0)
185 |     val lossOf = make.lossOf(net) _
186 |     testSet.foreachPartition {
187 |       iter ⇒
188 |         val f = future {
189 |           var sum = 0.0f
190 |           var c = 0
191 |           while (iter.hasNext) {
192 |             sum += lossOf(iter.next())
193 |             c += 1
194 |           }
195 |           loss += sum
196 |           count += c
197 |         }
198 | 
199 |         AsyncAwait.ready(f, 1.second)
200 |     }
201 | 
202 |     loss.value / count.value.toFloat
203 |   }
204 | }


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/RAEType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | import kr.ac.kaist.ir.deep.rec.BinaryTree
 6 | 
 7 | /**
 8 |  * __Input Operation__ : VectorTree as Input & Recursive Auto-Encoder Training (no output type)
 9 |  *
10 |  * @note We recommend that you should not apply this method to non-AutoEncoder tasks
11 |  * @note This implementation designed as a replica of the traditional RAE in
12 |  *       [[http://ai.stanford.edu/~ang/papers/emnlp11-RecursiveAutoencodersSentimentDistributions.pdf this paper]]
13 |  *
14 |  * @param corrupt Corruption that supervises how to corrupt the input matrix. `(Default : [[kr.ac.kaist.ir.deep.train.NoCorruption]])`
15 |  * @param error An objective function `(Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])`
16 |  *
17 |  * @example
18 |  * {{{var make = new RAEType(error = CrossEntropyErr)
19 |  *            var corruptedIn = make corrupted in
20 |  *            var out = make onewayTrip (net, corruptedIn)}}}
21 |  */
22 | class RAEType(override val corrupt: Corruption = NoCorruption,
23 |               override val error: Objective = SquaredErr)
24 |   extends TreeType {
25 | 
26 |   /**
27 |    * Apply & Back-prop given single input
28 |    *
29 |    * @param net A network that gets input
30 |    * @param delta Sequence of delta updates
31 |    */
32 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: BinaryTree, real: Null) ⇒ {
33 |     in forward {
34 |       x ⇒
35 |         val err = error.derivative(x, net passedBy x)
36 |         net updateBy(delta.toIterator, err)
37 |         // propagate hidden-layer value
38 |         net(x)
39 |     }
40 |   }
41 | 
42 |   /**
43 |    * Apply given input and compute the error
44 |    *
45 |    * @param net A network that gets input  
46 |    * @param pair (Input, Real output) for error computation.
47 |    * @return error of this network
48 |    */
49 |   def lossOf(net: Network)(pair: (BinaryTree, Null)): Scalar = {
50 |     var sum = 0.0f
51 |     val in = pair._1
52 |     in forward {
53 |       x ⇒
54 |         sum += error(x, net of x)
55 |         //propagate hidden-layer value
56 |         net(x)
57 |     }
58 |     sum
59 |   }
60 | 
61 |   /**
62 |    * Make validation output
63 |    *
64 |    * @return input as string
65 |    */
66 |   def stringOf(net: Network, pair: (BinaryTree, Null)): String = {
67 |     val string = StringBuilder.newBuilder
68 |     pair._1 forward {
69 |       x ⇒
70 |         val out = net of x
71 |         val hid = net(x)
72 |         string append s"IN: ${x.mkString} RAE → OUT: ${out.mkString}, HDN: ${hid.mkString}; "
73 |         // propagate hidden-layer value
74 |         hid
75 |     }
76 |     string.mkString
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/RandomEqualPartitioner.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import org.apache.spark.Partitioner
 4 | 
 5 | /**
 6 |  * Spark Partitioner that gives almost-equal partitions.
 7 |  *
 8 |  * @note Use this with RDD.zipWithUniqueId()
 9 |  *
10 |  * @param numPartition Number of partitions
11 |  */
12 | class RandomEqualPartitioner(val numPartition: Int) extends Partitioner {
13 |   private var nextNumber = 0
14 | 
15 |   def refreshRandom() = {
16 |     nextNumber += 1
17 |   }
18 | 
19 |   override def numPartitions: Int = numPartition
20 | 
21 |   override def getPartition(key: Any): Int = {
22 |     val i = key.asInstanceOf[Long] + nextNumber
23 |     val remain = i % numPartition
24 |     val quotient = ((i / numPartition) * nextNumber) % numPartition
25 |     val hash = ((remain + quotient) % numPartition).asInstanceOf[Int]
26 |     if (hash < 0)
27 |       hash + numPartition
28 |     else
29 |       hash
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/SingleThreadTrainStyle.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import java.util.concurrent.ThreadLocalRandom
  4 | 
  5 | import kr.ac.kaist.ir.deep.fn.{Scalar, WeightSeqOp, WeightUpdater}
  6 | import kr.ac.kaist.ir.deep.network.Network
  7 | import org.apache.spark.SparkContext
  8 | import org.apache.spark.rdd.RDD
  9 | 
 10 | /**
 11 |  * __Trainer__ : Stochastic-Style, Single-Threaded
 12 |  *
 13 |  * @param net __Network__ to be trained
 14 |  * @param algorithm Weight __update algorithm__ to be applied
 15 |  * @param make __Input Operation__ that supervises how to manipulate input as matrices.
 16 |  *             This also controls how to compute actual network. (default: [[VectorType]])
 17 |  * @param param __Training criteria__ (default: [[SimpleTrainingCriteria]])
 18 |  */
 19 | class SingleThreadTrainStyle[IN, OUT](override val net: Network,
 20 |                                       override val algorithm: WeightUpdater,
 21 |                                       override val make: ManipulationType[IN, OUT] = new VectorType(),
 22 |                                       override val param: TrainingCriteria = SimpleTrainingCriteria())
 23 |   extends TrainStyle[IN, OUT] {
 24 | 
 25 |   /** dWeight */
 26 |   private val dW = WeightAccumulator.zero(net.W).reverse
 27 |   /** Training set */
 28 |   private var trainingSet: Scalar ⇒ Seq[Pair] = null
 29 |   /** Test Set */
 30 |   private var testSet: Int ⇒ Seq[Pair] = null
 31 |   /** Test Set iterator */
 32 |   private var testSetMapper: (Pair ⇒ Unit) ⇒ Unit = null
 33 |   /** Test Set Context. Null if testset is a local seq */
 34 |   private var testSetSC: SparkContext = null
 35 |   /** Count */
 36 |   private var count = 0
 37 | 
 38 |   /**
 39 |    * Fetch weights
 40 |    *
 41 |    * @param iter current iteration
 42 |    */
 43 |   override def fetch(iter: Int): Unit = {}
 44 | 
 45 |   /**
 46 |    * Send update of weights
 47 |    *
 48 |    * @param iter current iteration
 49 |    */
 50 |   override def update(iter: Int): Unit = {
 51 |     dW :/= count.toFloat
 52 |     net.W -= dW.reverse
 53 |     count = 0
 54 |   }
 55 | 
 56 |   /**
 57 |    * Do mini-batch
 58 |    */
 59 |   override def batch(): Unit = {
 60 |     val seq = trainingSet(param.miniBatchFraction)
 61 |     val trip = make.roundTrip(net, dW)
 62 |     seq.foreach {
 63 |       case (x, y) ⇒
 64 |         count += 1
 65 |         trip(x, y)
 66 |     }
 67 |   }
 68 | 
 69 |   /**
 70 |    * Set training instances 
 71 |    * @param set Sequence of training set
 72 |    */
 73 |   override def setPositiveTrainingReference(set: Seq[(IN, OUT)]): Unit = {
 74 |     trainingSet = (x: Scalar) ⇒
 75 |       if (x > 0) {
 76 |         set.filter(_ ⇒ ThreadLocalRandom.current().nextFloat() < x)
 77 |       } else {
 78 |         set
 79 |       }
 80 |     validationEpoch = if (param.miniBatchFraction > 0) Math.round(1.0f / param.miniBatchFraction) else 1
 81 |   }
 82 | 
 83 |   /**
 84 |    * Set training instances
 85 |    * @param set RDD of training set
 86 |    */
 87 |   override def setPositiveTrainingReference(set: RDD[(IN, OUT)]): Unit = {
 88 |     trainingSet = (x: Scalar) ⇒
 89 |       if (x > 0) set.sample(withReplacement = true, fraction = x).collect().toSeq
 90 |       else set.collect()
 91 |     validationEpoch = if (param.miniBatchFraction > 0) Math.round(1.0f / param.miniBatchFraction) else 1
 92 |   }
 93 | 
 94 |   /**
 95 |    * Set testing instances 
 96 |    * @param set Sequence of testing set
 97 |    */
 98 |   override def setTestReference(set: Seq[(IN, OUT)]): Unit = {
 99 |     testSet = set.take
100 |     testSetMapper = (mapper: Pair ⇒ Unit) ⇒ {
101 |       var seq = set
102 |       while (seq.nonEmpty) {
103 |         mapper(seq.head)
104 |         seq = seq.tail
105 |       }
106 |     }
107 |     testSetSC = null
108 |   }
109 | 
110 |   /**
111 |    * Set testing instances
112 |    * @param set RDD of testing set
113 |    */
114 |   override def setTestReference(set: RDD[(IN, OUT)]): Unit = {
115 |     testSet = (n: Int) ⇒ set.takeSample(withReplacement = true, num = n).toSeq
116 |     testSetMapper = (mapper: Pair ⇒ Unit) ⇒ {
117 |       set.foreach(mapper)
118 |     }
119 |     testSetSC = set.context
120 |   }
121 | 
122 |   /**
123 |    * Calculate validation error
124 |    *
125 |    * @return validation error
126 |    */
127 |   def validationError() = {
128 |     val lossOf = make.lossOf(net) _
129 | 
130 |     if (testSetSC == null) {
131 |       // If it is from general "local" sequence
132 |       var sum = 0.0f
133 |       var count = 0
134 |       testSetMapper {
135 |         item ⇒
136 |           sum += lossOf(item)
137 |           count += 1
138 |       }
139 |       sum / count.toFloat
140 |     } else {
141 |       // If it is from RDD
142 |       val sum = testSetSC.accumulator(0.0f)
143 |       val count = testSetSC.accumulator(0)
144 |       val bcLoss = testSetSC.broadcast(lossOf)
145 |       testSetMapper {
146 |         item ⇒
147 |           sum += bcLoss.value(item)
148 |           count += 1
149 |       }
150 |       bcLoss.destroy()
151 |       sum.value / count.value.toFloat
152 |     }
153 |   }
154 | 
155 |   /**
156 |    * Iterate over given number of test instances 
157 |    * @param n number of random sampled instances
158 |    * @param fn iteratee function
159 |    */
160 |   override def foreachTestSet(n: Int)(fn: ((IN, OUT)) ⇒ Unit): Unit = {
161 |     var set = testSet(n)
162 |     while (set.nonEmpty) {
163 |       fn(set.head)
164 |       set = set.tail
165 |     }
166 |   }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/StandardRAEType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.layer.NormalizeOperation
 5 | import kr.ac.kaist.ir.deep.network.Network
 6 | import kr.ac.kaist.ir.deep.rec.BinaryTree
 7 | 
 8 | /**
 9 |  * __Input Operation__ : VectorTree as Input & Recursive Auto-Encoder Training (no output type)
10 |  *
11 |  * @note We recommend that you should not apply this method to non-AutoEncoder tasks
12 |  * @note This implementation designed as a replica of the standard RAE (RAE + normalization) in
13 |  *       [[http://ai.stanford.edu/~ang/papers/emnlp11-RecursiveAutoencodersSentimentDistributions.pdf this paper]]
14 |  *
15 |  * @param corrupt Corruption that supervises how to corrupt the input matrix. `(Default : [[kr.ac.kaist.ir.deep.train.NoCorruption]])`
16 |  * @param error An objective function `(Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])`
17 |  *
18 |  * @example
19 |  * {{{var make = new RAEType(error = CrossEntropyErr)
20 |  *             var corruptedIn = make corrupted in
21 |  *             var out = make onewayTrip (net, corruptedIn)}}}
22 |  */
23 | class StandardRAEType(override val corrupt: Corruption = NoCorruption,
24 |                       override val error: Objective = SquaredErr)
25 |   extends TreeType {
26 |   /** Normalization layer */
27 |   val normalizeLayer = new NormalizeOperation()
28 | 
29 |   /**
30 |    * Apply & Back-prop given single input
31 |    *
32 |    * @param net A network that gets input
33 |    * @param delta Sequence of delta updates
34 |    */
35 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: BinaryTree, real: Null) ⇒ {
36 |     in forward {
37 |       x ⇒
38 |         val out = net passedBy x
39 |         val zOut = normalizeLayer passedBy out
40 |         val dit = delta.toIterator
41 | 
42 |         // un-normalize the error
43 |         val normalErr = error.derivative(x, zOut)
44 |         val err = normalizeLayer updateBy(dit, normalErr)
45 | 
46 |         net updateBy(dit, err)
47 | 
48 |         // propagate hidden-layer value
49 |         net(x)
50 |     }
51 |   }
52 | 
53 |   /**
54 |    * Apply given input and compute the error
55 |    *
56 |    * @param net A network that gets input  
57 |    * @param pair (Input, Real output) for error computation.
58 |    * @return error of this network
59 |    */
60 |   def lossOf(net: Network)(pair: (BinaryTree, Null)): Scalar = {
61 |     var total = 0.0f
62 |     val in = pair._1
63 |     in forward {
64 |       x ⇒
65 |         val out = net of x
66 |         val normalized = normalizeLayer(out)
67 |         total += error(x, normalized)
68 |         //propagate hidden-layer value
69 |         net(x)
70 |     }
71 |     total
72 |   }
73 | 
74 |   /**
75 |    * Make validation output
76 |    *
77 |    * @return input as string
78 |    */
79 |   def stringOf(net: Network, pair: (BinaryTree, Null)): String = {
80 |     val string = StringBuilder.newBuilder
81 |     pair._1 forward {
82 |       x ⇒
83 |         val out = net of x
84 |         val normalized = normalizeLayer(out)
85 |         val hid = net(x)
86 |         string append s"IN: ${x.mkString} RAE → OUT: ${normalized.mkString}, HDN: ${hid.mkString}; "
87 |         // propagate hidden-layer value
88 |         hid
89 |     }
90 |     string.mkString
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/TrainStyle.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import kr.ac.kaist.ir.deep.network.Network
  5 | import org.apache.log4j.{Level, Logger}
  6 | import org.apache.spark.rdd.RDD
  7 | 
  8 | import scala.concurrent.Future
  9 | 
 10 | /**
 11 |  * __Trait__ that describes style of training
 12 |  *
 13 |  * This trait controls how to train, i.e. __Single-threaded__ or __Distributed__.
 14 |  *
 15 |  * @tparam IN the type of input
 16 |  * @tparam OUT the type of output
 17 |  */
 18 | trait TrainStyle[IN, OUT] extends Serializable {
 19 |   // Turnoff spark logging feature.
 20 |   Logger.getRootLogger.setLevel(Level.WARN)
 21 |   Logger.getLogger("kr.ac").setLevel(Level.INFO)
 22 | 
 23 |   /** Training Pair Type */
 24 |   type Pair = (IN, OUT)
 25 |   /** Sampler Type */
 26 |   type Sampler = Int ⇒ Seq[OUT]
 27 |   /** Training parameters */
 28 |   val param: TrainingCriteria
 29 |   /** Network */
 30 |   val net: Network
 31 |   /** Algorithm */
 32 |   val algorithm: WeightUpdater
 33 |   /** Set of input manipulations */
 34 |   val make: ManipulationType[IN, OUT]
 35 |   /** Logger */
 36 |   @transient protected val logger = Logger.getLogger(this.getClass)
 37 |   /** number of epochs for iterating one training set */
 38 |   var validationEpoch: Int = 0
 39 | 
 40 |   /**
 41 |    * Calculate validation error
 42 |    *
 43 |    * @return validation error
 44 |    */
 45 |   def validationError(): Scalar
 46 | 
 47 |   /**
 48 |    * Iterate over given number of test instances
 49 |    * @param n number of random sampled instances
 50 |    * @param fn iteratee function
 51 |    */
 52 |   def foreachTestSet(n: Int)(fn: Pair ⇒ Unit): Unit
 53 | 
 54 |   /**
 55 |    * Set training instances 
 56 |    * @param set Sequence of training set
 57 |    */
 58 |   def setPositiveTrainingReference(set: Seq[Pair]): Unit
 59 | 
 60 |   /**
 61 |    * Set training instances
 62 |    * @param set RDD of training set
 63 |    */
 64 |   def setPositiveTrainingReference(set: RDD[Pair]): Unit
 65 | 
 66 |   /**
 67 |    * Set testing instances 
 68 |    * @param set Sequence of testing set
 69 |    */
 70 |   def setTestReference(set: Seq[Pair]): Unit
 71 | 
 72 |   /**
 73 |    * Set testing instances
 74 |    * @param set RDD of testing set
 75 |    */
 76 |   def setTestReference(set: RDD[Pair]): Unit
 77 |   
 78 |   /**
 79 |    * Fetch weights
 80 |    *
 81 |    * @param iter current iteration
 82 |    */
 83 |   def fetch(iter: Int): Unit
 84 | 
 85 |   /**
 86 |    * Do mini-batch
 87 |    */
 88 |   def batch(): Unit
 89 | 
 90 |   /**
 91 |    * Send update of weights
 92 |    *
 93 |    * @param iter current iteration
 94 |    */
 95 |   def update(iter: Int): Unit
 96 | 
 97 |   /**
 98 |    * Indicates whether the asynchronous update is finished or not.
 99 |    *
100 |    * @return future object of update
101 |    */
102 |   def isUpdateFinished: Future[_] = null
103 | 
104 |   /**
105 |    * Non-blocking pending, until all assigned batches are finished
106 |    */
107 |   def stopUntilBatchFinished(): Unit = {}
108 | 
109 |   /**
110 |    * Implicit weight operation
111 |    *
112 |    * @param w Sequence of weight to be applied
113 |    */
114 |   implicit class WeightOp(w: IndexedSeq[ScalarMatrix]) extends Serializable {
115 |     /**
116 |      * Sugar: Weight update
117 |      *
118 |      * @param dw A amount of update i.e. __ΔWeight__
119 |      */
120 |     def -=(dw: IndexedSeq[ScalarMatrix]) = algorithm(dw, w)
121 |   }
122 | 
123 | }
124 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/Trainer.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import java.text.SimpleDateFormat
  4 | import java.util.Date
  5 | 
  6 | import kr.ac.kaist.ir.deep.fn._
  7 | import org.apache.log4j.Logger
  8 | import org.apache.spark.rdd.RDD
  9 | 
 10 | import scala.annotation.tailrec
 11 | import scala.concurrent.Await
 12 | import scala.concurrent.duration._
 13 | 
 14 | 
 15 | /**
 16 |  * __General__ Trainer Implementation.
 17 |  *
 18 |  * This class trains with help of Training Style and Input Operation.
 19 |  *
 20 |  * @note This trainer is generalized class. Further implementation, you should see several styles.       
 21 |  * @example
 22 |  * {{{val net:Network = ...
 23 |  *
 24 |  *          // Define Manipulation Type. VectorType, AEType, RAEType and URAEType.
 25 |  *          val operation = new VectorType(
 26 |  *             corrupt = GaussianCorruption(variance = 0.1)
 27 |  *          )
 28 |  *
 29 |  *          // Define Manipulation Type. VectorType, AEType, RAEType, StandardRAEType, URAEType, and StringToVectorType.
 30 |  *          val style = new SingleThreadTrainStyle(
 31 |  *            net = net,
 32 |  *            algorithm = new StochasticGradientDescent(l2decay = 0.0001),
 33 |  *             make = operation,
 34 |  *             param = SimpleTrainingCriteria(miniBatchFraction = 0.01))
 35 |  *
 36 |  *         // Define Trainer
 37 |  *         val train = new Trainer(
 38 |  *            style = style,
 39 |  *            stops = StoppingCriteria(maxIter = 100000))
 40 |  *
 41 |  *         // Do Train
 42 |  *         train.train(set, valid)}}}
 43 |  *
 44 |  * @note To train an autoencoder, you can provide same training set as validation set.
 45 |  *
 46 |  * @param style __Training style__ that supervises how to train. There are two styles,
 47 |  *              one is [[SingleThreadTrainStyle]]
 48 |  *              and the other is [[DistBeliefTrainStyle]].
 49 |  * @param stops __Stopping Criteria__ that controls the threshold for stopping. (Default : [[StoppingCriteria]])
 50 |  * @param name Name used for logging.
 51 |  *
 52 |  * @tparam IN the type of input.
 53 |  *            Currently, [[kr.ac.kaist.ir.deep.fn.ScalarMatrix]] and DAG are supported
 54 |  * @tparam OUT the type of output
 55 |  *             Currently, [[kr.ac.kaist.ir.deep.fn.ScalarMatrix]] and Null are supported
 56 |  */
 57 | class Trainer[IN, OUT](val style: TrainStyle[IN, OUT],
 58 |                        val stops: StoppingCriteria = StoppingCriteria(),
 59 |                        val name: String = "Trainer")
 60 |   extends Serializable {
 61 |   /** import everything in the style */
 62 | 
 63 |   import style._
 64 | 
 65 |   @transient private final val dateFormatter = new SimpleDateFormat("MM/dd HH:mm:ss")
 66 |   /** Logger */
 67 |   @transient protected val logger = Logger.getLogger(this.getClass)
 68 |   /** Best Parameter History */
 69 |   @transient protected var bestParam: IndexedSeq[ScalarMatrix] = null
 70 |   /** Best Loss Iteration Number */
 71 |   @transient protected var bestIter: Int = 0
 72 |   /** Period of validation */
 73 |   @transient protected var validationPeriod: Int = 0
 74 |   /** Get command line column width */
 75 |   @transient protected var columns = try {
 76 |     System.getenv("COLUMNS").toInt
 77 |   } catch {
 78 |     case _: Throwable ⇒ 80
 79 |   }
 80 |   /** Finish time of last iteration */
 81 |   @transient protected var startAt: Long = _
 82 | 
 83 |   /**
 84 |    * Train given sequence, and validate with given sequence.
 85 |    *
 86 |    * @param set Full Sequence of training set
 87 |    * @return Training error (loss)
 88 |    */
 89 |   def train(set: Seq[Pair]): (Scalar, Scalar, Scalar) = train(set, set)
 90 | 
 91 |   /**
 92 |    * Train given sequence, and validate with another sequence.
 93 |    *
 94 |    * @param set Full Sequence of training set
 95 |    * @param validation Full Sequence of validation set
 96 |    * @return Training error (loss)
 97 |    */
 98 |   def train(set: Seq[Pair],
 99 |             validation: Seq[Pair]): (Scalar, Scalar, Scalar) = {
100 |     setPositiveTrainingReference(set)
101 |     setTestReference(validation)
102 | 
103 |     validationPeriod = (stops.validationFreq * validationEpoch).toInt
104 | 
105 |     if (validationPeriod > 0) {
106 |       logger info f"($name) Starts training. "
107 |       logger info f"($name) Every $validationPeriod%5d (${stops.validationFreq * 100}%6.2f%% of TrainingSet), " +
108 |         f"validation process will be submitted."
109 | 
110 |       saveParams()
111 |       val err = lossOfTraining
112 |       restoreParams()
113 |       printValidation()
114 | 
115 |       err
116 |     } else {
117 |       logger warn f"($name) Validation Period is zero! Training stopped."
118 |       logger warn f"($name) Maybe because miniBatchFraction value is too large. Please check."
119 |       (Float.PositiveInfinity, Float.PositiveInfinity, Float.PositiveInfinity)
120 |     }
121 |   }
122 | 
123 |   /**
124 |    * Train using given RDD sequence. 
125 |    *
126 |    * @param set RDD of training set
127 |    */
128 |   def train(set: RDD[Pair]): (Scalar, Scalar, Scalar) = train(set, set)
129 | 
130 |   /**
131 |    * Train using given RDD sequence. 
132 |    *
133 |    * @param set RDD of training set
134 |    * @param validation RDD of validation set
135 |    */
136 |   def train(set: RDD[Pair], validation: RDD[Pair]): (Scalar, Scalar, Scalar) = {
137 |     setPositiveTrainingReference(set)
138 |     setTestReference(validation)
139 | 
140 |     validationPeriod = (stops.validationFreq * validationEpoch).toInt
141 | 
142 |     if (validationPeriod > 0) {
143 |       logger info f"($name) Starts training. "
144 |       logger info f"($name) Every $validationPeriod%5d (${stops.validationFreq * 100}%6.2f%% of TrainingSet), " +
145 |         f"validation process will be submitted."
146 | 
147 |       saveParams()
148 |       val err = lossOfTraining
149 |       restoreParams()
150 |       printValidation()
151 | 
152 |       err
153 |     } else {
154 |       logger warn f"($name) Validation Period is zero! Training stopped."
155 |       logger warn f"($name) Maybe because miniBatchFraction value is too large. Please check."
156 |       (Float.PositiveInfinity, Float.PositiveInfinity, Float.PositiveInfinity)
157 |     }
158 |   }
159 | 
160 |   /**
161 |    * Print validation result into logger
162 |    */
163 |   protected def printValidation() = {
164 |     logger info s"($name) BEST ITERATION : $bestIter"
165 |     foreachTestSet(5) {
166 |       item ⇒ logger info make.stringOf(net, item)
167 |     }
168 |   }
169 | 
170 |   /**
171 |    * Store best parameters
172 |    *
173 |    * @param iteration current iteration. (1 iteration = 1 validation freq)
174 |    * @param loss previous loss
175 |    * @param patience current patience, i.e. loop until at least this epoch.
176 |    */
177 |   protected final def saveParams(iteration: Int = 0,
178 |                                  loss: Scalar = Float.MaxValue,
179 |                                  patience: Int = validationPeriod * 5) = {
180 |     bestParam = net.W.copy
181 |     bestIter = iteration
182 |   }
183 | 
184 |   /**
185 |    * Restore best parameters
186 |    */
187 |   protected final def restoreParams() = {
188 |     // Wait for finish of update, to prohibit race condition.
189 |     if (isUpdateFinished != null) {
190 |       try {
191 |         Await.ready(isUpdateFinished, 5.minutes)
192 |       } catch {
193 |         case _: Throwable ⇒
194 |       }
195 |     }
196 | 
197 |     net.W := bestParam
198 |   }
199 | 
200 |   /**
201 |    * Tail Recursive : Train each batch
202 |    *
203 |    * @param epoch current iteration epoch. (1 iteration = 1 validation freq)
204 |    * @param prevEloss previous loss (Evaluation)
205 |    * @param prevWloss previous loss (Weight)
206 |    * @param patience current patience, i.e. loop until at least this epoch.
207 |    * @return (Evaluation, Weight, Total) Loss when train is finished
208 |    */
209 |   @tailrec
210 |   protected final def trainBatch(epoch: Int = 0,
211 |                                  prevEloss: Scalar = Float.MaxValue,
212 |                                  prevWloss: Scalar = Float.MaxValue,
213 |                                  patience: Int = 5): (Scalar, Scalar, Scalar) = {
214 |     fetch(epoch)
215 |     batch()
216 |     update(epoch)
217 | 
218 |     var nPatience = patience
219 |     val iter = epoch / validationPeriod + 1
220 | 
221 |     val prevloss = prevEloss + prevWloss
222 |     val nLoss = if ((epoch + 1) % validationPeriod == 0) {
223 |       // Pending until batch finished
224 |       stopUntilBatchFinished()
225 | 
226 |       val train = validationError()
227 |       val weight = algorithm loss net.W
228 |       val loss = train + weight
229 |       val improvement = if (prevloss > 0f) loss / prevloss else stops.improveThreshold
230 |       if (improvement < stops.improveThreshold) {
231 |         nPatience = Math.min(Math.max(patience, iter * (stops.waitAfterUpdate + 1)), stops.maxIter)
232 |         saveParams(iter, loss, nPatience)
233 | 
234 |         printProgress(iter, nPatience, train, weight, improved = true)
235 |         (train, weight, loss)
236 |       } else {
237 |         printProgress(iter, nPatience, prevEloss, prevWloss, improved = false)
238 |         (prevEloss, prevWloss, prevloss)
239 |       }
240 |     } else {
241 |       (prevEloss, prevWloss, prevloss)
242 |     }
243 | 
244 |     if (iter <= nPatience && (nLoss._3 >= stops.lossThreshold || iter < 5)) {
245 |       trainBatch(epoch + 1, nLoss._1, nLoss._2, nPatience)
246 |     } else {
247 |       if (nLoss._3 < stops.lossThreshold)
248 |         logger info f"($name) # $iter%4d/$nPatience%4d, " +
249 |           f"FINISHED with E + W = ${nLoss._3}%.5f [Loss < ${stops.lossThreshold}%.5f]"
250 |       else if (iter > stops.maxIter)
251 |         logger info f"($name) # $iter%4d/$nPatience%4d, " +
252 |           f"FINISHED with E + W = ${nLoss._3}%.5f [Iteration > ${stops.maxIter}%6d]"
253 |       else if (nPatience < iter)
254 |         logger info f"($name) # $iter%4d/$nPatience%4d, " +
255 |           f"FINISHED with E + W = ${nLoss._3}%.5f [NoUpdate after $bestIter%6d]"
256 | 
257 |       nLoss
258 |     }
259 |   }
260 | 
261 |   private def printProgress(iter: Int, patience: Int, eLoss: Float, wLoss: Float, improved: Boolean) = {
262 |     val wait = patience / stops.maxIter.toFloat
263 |     val header = f"\033[4m$name\033[24m $iter%4d/$patience%4d \033[0m["
264 |     val impr = if (improved) "IMPROVED" else f" @ $bestIter%4d "
265 |     val footer = f" E + W = $eLoss%7.5f + $wLoss%7.5f $impr"
266 | 
267 |     val buf = new StringBuilder(s"\033[2A\033[${columns}D\033[2K \033[1;33m$header\033[46;36m")
268 |     val total = columns - header.length - footer.length + 10
269 |     val len = Math.floor(wait * total).toInt
270 |     val step = Math.floor(iter / stops.maxIter.toFloat * total).toInt
271 |     buf.append(" " * step)
272 |     buf.append("\033[49m")
273 |     buf.append(" " * (len - step))
274 |     buf.append("\033[0m]\033[34m")
275 |     if (total > len) buf.append(s"\033[${total - len}C")
276 |     buf.append(s"$footer\033[0m")
277 | 
278 |     val now = System.currentTimeMillis()
279 |     val remainA = (now - startAt) / iter * patience
280 |     val etaA = startAt + remainA
281 |     val calA = dateFormatter.format(new Date(etaA))
282 |     val remainB = (now - startAt) / iter * stops.maxIter
283 |     val etaB = startAt + remainB
284 |     val calB = dateFormatter.format(new Date(etaB))
285 | 
286 |     buf.append(f"\n\033[2K Estimated Finish Time : $calA \t ~ $calB")
287 | 
288 |     println(buf.result())
289 |   }
290 | 
291 |   /**
292 |    * Do actual training process
293 |    * @return MSE of the training process
294 |    */
295 |   private def lossOfTraining: (Scalar, Scalar, Scalar) =
296 |     if (param.miniBatchFraction > 0) {
297 |       println("Start training...\n Estimated Time: NONE")
298 |       startAt = System.currentTimeMillis()
299 |       trainBatch()
300 |     } else {
301 |       fetch(0)
302 |       batch()
303 |       update(0)
304 | 
305 |       val train = validationError()
306 |       val weight = algorithm loss net.W
307 |       val loss = train + weight
308 |       saveParams(0, loss, 0)
309 | 
310 |       logger info f"($name) PASSONCE, E + W = $train%.5f + $weight%.5f = $loss%.5f"
311 |       (train, weight, loss)
312 |     }
313 | 
314 | }
315 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/TrainingCriteria.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | /**
 4 |  * __Trait__ that describes Training Criteria
 5 |  */
 6 | trait TrainingCriteria extends Serializable {
 7 |   /** Size of mini-batch.
 8 |     * If below or equal to zero, then this indicates no batch training (i.e. just go through once.) */
 9 |   val miniBatchFraction: Float
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/TreeType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | import kr.ac.kaist.ir.deep.rec._
 6 | 
 7 | /**
 8 |  * __Trait of Input Operation__ : VectorTree as Input. This is an '''Abstract Implementation'''
 9 |  */
10 | trait TreeType extends ManipulationType[BinaryTree, Null] {
11 | 
12 |   /**
13 |    * Corrupt input
14 |    *
15 |    * @param x input to be corrupted
16 |    * @return corrupted input
17 |    */
18 |   override def corrupted(x: BinaryTree): BinaryTree = (x through corrupt).asInstanceOf[BinaryTree]
19 | 
20 |   /**
21 |    * Apply given single input as one-way forward trip.
22 |    *
23 |    * @param net A network that gets input
24 |    * @param x input to be computed
25 |    * @return output of the network.
26 |    */
27 |   override def onewayTrip(net: Network, x: BinaryTree): ScalarMatrix =
28 |     x forward net.of
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/URAEType.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.train
  2 | 
  3 | import kr.ac.kaist.ir.deep.fn._
  4 | import kr.ac.kaist.ir.deep.network.{AutoEncoder, Network}
  5 | import kr.ac.kaist.ir.deep.rec.BinaryTree
  6 | import org.apache.spark.annotation.Experimental
  7 | 
  8 | /**
  9 |  * __Input Operation__ : VectorTree as Input & Unfolding Recursive Auto Encoder Training (no output type)
 10 |  *
 11 |  * ::Experimental::
 12 |  * @note This cannot be applied into non-AutoEncoder tasks
 13 |  * @note This is designed for Unfolding RAE, in
 14 |  *       [[http://ai.stanford.edu/~ang/papers/nips11-DynamicPoolingUnfoldingRecursiveAutoencoders.pdf this paper]]
 15 |  *
 16 |  * @param corrupt Corruption that supervises how to corrupt the input matrix. `(Default : [[kr.ac.kaist.ir.deep.train.NoCorruption]])`
 17 |  * @param error An objective function `(Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])`
 18 |  *
 19 |  * @example
 20 |  * {{{var make = new URAEType(error = CrossEntropyErr)
 21 |  *            var corruptedIn = make corrupted in
 22 |  *            var out = make onewayTrip (net, corruptedIn)}}}
 23 |  */
 24 | @Experimental
 25 | class URAEType(override val corrupt: Corruption = NoCorruption,
 26 |                override val error: Objective = SquaredErr)
 27 |   extends TreeType {
 28 | 
 29 |   /**
 30 |    * Apply & Back-prop given single input
 31 |    *
 32 |    * @param net A network that gets input
 33 |    * @param delta Sequence of delta updates
 34 |    */
 35 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: BinaryTree, real: Null) ⇒
 36 |     net match {
 37 |       case net: AutoEncoder ⇒
 38 |         val out = in forward net.encode
 39 | 
 40 |         // Decode phrase of reconstruction
 41 |         var terminals = in.backward(out, net.decode)
 42 |         while (terminals.nonEmpty) {
 43 |           val leaf = terminals.head
 44 |           terminals = terminals.tail
 45 | 
 46 |           leaf.out = error.derivative(leaf.out, leaf.x)
 47 |         }
 48 | 
 49 |         // Error propagation for decoder
 50 |         val err = in forward net.decode_!(delta.take(2).toIterator)
 51 | 
 52 |         // Error propagation for encoder
 53 |         in backward(err, net.encode_!(delta.takeRight(2).toIterator))
 54 |     }
 55 | 
 56 | 
 57 |   /**
 58 |    * Apply given input and compute the error
 59 |    *
 60 |    * @param net A network that gets input
 61 |    * @param pair (Input, Real output) for error computation.
 62 |    * @return error of this network
 63 |    */
 64 |   def lossOf(net: Network)(pair: (BinaryTree, Null)): Scalar =
 65 |     net match {
 66 |       case net: AutoEncoder ⇒
 67 |         var sum = 0.0f
 68 |         val in = pair._1
 69 |         // Encode phrase of Reconstruction
 70 |         val out = in forward net.apply
 71 | 
 72 |         // Decode phrase of reconstruction
 73 |         var terminals = in.backward(out, net.reconstruct)
 74 |         val size = terminals.size
 75 |         while (terminals.nonEmpty) {
 76 |           val leaf = terminals.head
 77 |           terminals = terminals.tail
 78 |           sum += error(leaf.out, leaf.x)
 79 |         }
 80 |         sum
 81 |       case _ ⇒ 0.0f
 82 |     }
 83 | 
 84 | 
 85 |   /**
 86 |    * Make validation output
 87 |    *
 88 |    * @return input as string
 89 |    */
 90 |   def stringOf(net: Network, pair: (BinaryTree, Null)): String =
 91 |     net match {
 92 |       case net: AutoEncoder ⇒
 93 |         val string = StringBuilder.newBuilder
 94 |         val in = pair._1
 95 |         // Encode phrase of Reconstruction
 96 |         val out = in forward net.apply
 97 | 
 98 |         // Decode phrase of reconstruction
 99 |         var terminals = in.backward(out, net.reconstruct)
100 |         while (terminals.nonEmpty) {
101 |           val leaf = terminals.head
102 |           terminals = terminals.tail
103 | 
104 |           string append s"IN: ${leaf.x.mkString} URAE → OUT: ${leaf.out.mkString};"
105 |         }
106 |         string.mkString
107 |       case _ ⇒ "NOT AN AUTOENCODER"
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/VectorType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.train
 2 | 
 3 | import breeze.linalg.any
 4 | import kr.ac.kaist.ir.deep.fn._
 5 | import kr.ac.kaist.ir.deep.network.Network
 6 | 
 7 | /**
 8 |  * __Input Operation__ : Vector as Input and output
 9 |  *
10 |  * @param corrupt Corruption that supervises how to corrupt the input matrix. (Default : [[NoCorruption]])
11 |  * @param error An objective function (Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])
12 |  *
13 |  * @example
14 |  * {{{var make = new VectorType(error = CrossEntropyErr)
15 |  *            var corruptedIn = make corrupted in
16 |  *            var out = make onewayTrip (net, corruptedIn)}}}
17 |  */
18 | class VectorType(override val corrupt: Corruption = NoCorruption,
19 |                  override val error: Objective = SquaredErr)
20 |   extends ManipulationType[ScalarMatrix, ScalarMatrix] {
21 | 
22 |   /**
23 |    * Corrupt input
24 |    *
25 |    * @param x input to be corrupted 
26 |    * @return corrupted input
27 |    */
28 |   override def corrupted(x: ScalarMatrix): ScalarMatrix = corrupt(x)
29 | 
30 |   /**
31 |    * Apply & Back-prop given single input
32 |    *
33 |    * @param net A network that gets input
34 |    * @param delta Sequence of delta updates
35 |    */
36 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: ScalarMatrix, real: ScalarMatrix) ⇒ {
37 |     val out = net passedBy in
38 |     val err: ScalarMatrix = error.derivative(real, out)
39 |     net updateBy(delta.toIterator, err)
40 |   }
41 | 
42 |   /**
43 |    * Apply given input and compute the error
44 |    *
45 |    * @param net A network that gets input
46 |    * @param pair (Input, Real output) for error computation.
47 |    * @return error of this network
48 |    */
49 |   override def lossOf(net: Network)(pair: (ScalarMatrix, ScalarMatrix)): Scalar = {
50 |     val in = pair._1
51 |     val real = pair._2
52 |     val out = net of in
53 |     error(real, out)
54 |   }
55 | 
56 |   /**
57 |    * Apply given single input as one-way forward trip.
58 |    *
59 |    * @param net A network that gets input
60 |    * @param x input to be computed
61 |    * @return output of the network.
62 |    */
63 |   override def onewayTrip(net: Network, x: ScalarMatrix): ScalarMatrix = net of x
64 | 
65 |   /**
66 |    * Make validation output
67 |    *
68 |    * @return input as string
69 |    */
70 |   def stringOf(net: Network, pair: (ScalarMatrix, ScalarMatrix)): String = {
71 |     val in = pair._1
72 |     val real = pair._2
73 |     val out = net of in
74 |     s"IN: ${in.mkString} EXP: ${real.mkString} → OUT: ${out.mkString}"
75 |   }
76 | 
77 |   /**
78 |    * Check whether given two are same or not.
79 |    * @param x Out-type object
80 |    * @param y Out-type object
81 |    * @return True if they are different.
82 |    */
83 |   override def different(x: ScalarMatrix, y: ScalarMatrix): Boolean = any(x :!= y)
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/train/package.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep
  2 | 
  3 | import breeze.stats.distributions.Gaussian
  4 | import kr.ac.kaist.ir.deep.fn._
  5 | import org.apache.spark.AccumulatorParam
  6 | import org.apache.spark.storage.StorageLevel
  7 | 
  8 | import scala.annotation.tailrec
  9 | import scala.concurrent.duration._
 10 | 
 11 | /**
 12 |  * Package for training.
 13 |  */
 14 | package object train {
 15 | 
 16 |   /** Type of Corruption */
 17 |   trait Corruption extends (ScalarMatrix ⇒ ScalarMatrix) with Serializable
 18 | 
 19 |   /**
 20 |    * __Input Corruption__: Drop input as zero.
 21 |    *
 22 |    * If network uses drop-out training, we recommend that you do not use this.
 23 |    *
 24 |    * @note If the presence probability is `P%`, then this corruption leaves `P%` entries of the matrix
 25 |    *
 26 |    * @param presence probability of __not-dropped__. `(default 95% = 0.95)`
 27 |    *
 28 |    * @example
 29 |    * {{{var corrupt = DroppingCorruption(presence = 0.99)
 30 |    *        var corrupted = corrupt(vector)}}}
 31 |    */
 32 |   case class DroppingCorruption(presence: Float = 0.95f) extends Corruption {
 33 |     /**
 34 |      * Do corruption
 35 |      *
 36 |      * @param v1 Matrix to be corrupted
 37 |      * @return corrupted vector
 38 |      */
 39 |     override def apply(v1: ScalarMatrix): ScalarMatrix =
 40 |       v1 mapValues { x ⇒ if (Math.random() > presence) 0.0f else x}
 41 |   }
 42 |   
 43 |   /**
 44 |    * __Input Corruption__: Gaussian
 45 |    *
 46 |    * @param mean __Mean__ of noise `(default 0.0)`
 47 |    * @param variance __Variance__ of noise `(default 0.1)`
 48 |    *
 49 |    * @example
 50 |    * {{{var corrupt = GaussianCorruption(variance = 0.1)
 51 |    *       var corrupted = corrupt(vector)}}}
 52 |    */
 53 |   case class GaussianCorruption(mean: Double = 0.0, variance: Double = 0.1) extends Corruption {
 54 |     /**
 55 |      * Gaussian Distribution
 56 |      */
 57 |     private lazy val distro = Gaussian distribution(mean, variance)
 58 | 
 59 |     /**
 60 |      * Do corruption
 61 |      *
 62 |      * @param v1 Matrix to be corrupted
 63 |      * @return corrupted vector
 64 |      */
 65 |     override def apply(v1: ScalarMatrix): ScalarMatrix =
 66 |       v1 mapValues { x ⇒ x + distro.draw().toFloat}
 67 |   }
 68 | 
 69 |   /**
 70 |    * __Criteria__: When to stop training
 71 |    *
 72 |    * This case class defines when to stop training. Training stops if one of the following condition is satisfied.
 73 |    *
 74 |   - #Iteration ≥ maxIter
 75 |    - #Iteration ≥ current patience value, which is calculated by `max(patience, bestIteration * patienceStep)`
 76 |    - Amount of loss < lossThreshold
 77 |    *
 78 |    * Validation is done for each `validationFreq` iterations,
 79 |    * and whenever current/best loss ratio below improveThreshold,
 80 |    * that iteration is marked as best iteration.
 81 |    *
 82 |    * @param maxIter __maximum mini-batch__ iteration count `(default 100,000)`
 83 |    * @param waitAfterUpdate __multiplier__ for calculating patience `(default 1 := Wait lastupdate# * 1 after update)`
 84 |    * @param improveThreshold __threshold__ that iteration is marked as "improved" `(default 99.5% = 0.995)`
 85 |    * @param lossThreshold __maximum-tolerant__ loss value. `(default 0.0001)`
 86 |    * @param validationFreq __multiplier__ used for count for validation. `(default 1.0f)`
 87 |    *                       Validation checked whenever (validationFreq) * (#epoch for 1 training batch).
 88 |    *                       where #epoch for 1 iteration = round(1 / miniBatchFraction).
 89 |    */
 90 |   case class StoppingCriteria(maxIter: Int = 100000,
 91 |                               waitAfterUpdate: Int = 1,
 92 |                               improveThreshold: Float = 0.995f,
 93 |                               lossThreshold: Float = 0.0001f,
 94 |                               validationFreq: Float = 1.0f)
 95 |     extends Serializable
 96 | 
 97 |   /**
 98 |    * __Criteria__: How to train (for [[SingleThreadTrainStyle]])
 99 |    *
100 |    * This case class defines how to train the network. Training parameter is defined in this class.
101 |    *
102 |    * @param miniBatchFraction size of __mini-batch__ `(default 0.01 = 1%)`
103 |    *                          If below or equal to zero, then this indicates no batch training (i.e. just go through once.)
104 |    */
105 |   case class SimpleTrainingCriteria(override val miniBatchFraction: Float = 0.01f) extends TrainingCriteria
106 | 
107 |   /**
108 |    * __Criteria__: How to train (for [[DistBeliefTrainStyle]])
109 |    *
110 |    * This case class defines how to train the network. Training parameter is defined in this class.
111 |    *
112 |    * @param miniBatchFraction size of __mini-batch__ `(default 0.01 = 1%)`
113 |    *                          If below or equal to zero, then this indicates no batch training (i.e. just go through once.)
114 |    * @param submitInterval Time interval between batch submission. `(default 1.minute)`
115 |    * @param updateStep number of __mini-batches__ between update `(default 2)`
116 |    * @param fetchStep number of __mini-batches__ between fetching `(default 10)`
117 |    * @param numCores number of __v-cores__ in the spark cluster. `(default 1)`
118 |    * @param repartitionOnStart true if do repartition when define training/testing RDD instances. `(default false)`
119 |    * @param storageLevel StorageLevel that will be used in Spark. `(default DISK_ONLY_2)`
120 |    *
121 |    * @note We recommend set numCores as similar as possible with allocated spark v-cores.
122 |    */
123 |   case class DistBeliefCriteria(override val miniBatchFraction: Float = 0.01f,
124 |                                 submitInterval: Duration = 30.seconds,
125 |                                 updateStep: Int = 2,
126 |                                 fetchStep: Int = 10,
127 |                                 numCores: Int = 1,
128 |                                 repartitionOnStart: Boolean = true,
129 |                                 storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) extends TrainingCriteria
130 | 
131 |   /**
132 |    * Accumulator Param object for DistBelief Train Style.
133 |    */
134 |   implicit object WeightAccumulator extends AccumulatorParam[IndexedSeq[ScalarMatrix]] {
135 |     /**
136 |      * Add in place function
137 |      * @param r1 left hand side
138 |      * @param r2 right hand side
139 |      * @return r1 + r2 in r1
140 |      */
141 |     override def addInPlace(r1: IndexedSeq[ScalarMatrix], r2: IndexedSeq[ScalarMatrix]): IndexedSeq[ScalarMatrix] = {
142 |       r1 :+= r2
143 |     }
144 | 
145 |     /**
146 |      * Zero value
147 |      * @param initialValue initial value
148 |      * @return initial zero value.
149 |      */
150 |     override def zero(initialValue: IndexedSeq[ScalarMatrix]): IndexedSeq[ScalarMatrix] =
151 |       initialValue.map {
152 |         matx ⇒
153 |           ScalarMatrix $0(matx.rows, matx.cols)
154 |       }
155 |   }
156 | 
157 |   /**
158 |    * Non-blocking await 
159 |    */
160 |   object AsyncAwait extends Serializable {
161 | 
162 |     import scala.concurrent.ExecutionContext.Implicits.global
163 |     import scala.concurrent._
164 |     
165 |     /**
166 |      * Tail-recursive version of non-block pending
167 |      * @param f Future object to wait
168 |      * @param interval Duration object specifying waiting time.
169 |      */
170 |     @tailrec
171 |     final def ready(f: Future[_], interval: Duration): Unit = try {
172 |       Await.ready(f, interval)
173 |     } catch {
174 |       case _: TimeoutException ⇒ ready(f, interval)
175 |     }
176 | 
177 |     /**
178 |      * Tail-recursive version of non-block pending
179 |      * @param interval Duration object specifying waiting time.
180 |      * @param f Future objects to wait
181 |      */
182 |     final def readyAll(interval: Duration, f: Future[Any]*): Unit =
183 |       ready(Future.sequence(f.seq), interval)
184 |   }
185 | 
186 |   /**
187 |    * __Input Corruption__: Never corrupts input
188 |    *
189 |    * @example 
190 |    * {{{var corrupt = NoCorruption(variance = 0.1)
191 |    *       var corrupted = corrupt(vector)}}}
192 |    */
193 |   case object NoCorruption extends Corruption {
194 | 
195 |     /**
196 |      * Identity.
197 |      * @param v1 to be corrupted
198 |      * @return the vector
199 |      */
200 |     override def apply(v1: ScalarMatrix) = v1
201 |   }
202 | 
203 | }
204 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/wordvec/PrepareCorpus.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep.wordvec
  2 | 
  3 | import java.util
  4 | 
  5 | import org.apache.spark.broadcast.Broadcast
  6 | import org.apache.spark.rdd.RDD
  7 | import org.apache.spark.storage.StorageLevel
  8 | import org.apache.spark.{Logging, SparkConf, SparkContext}
  9 | import org.apache.log4j._
 10 | 
 11 | import scala.collection.JavaConversions._
 12 | 
 13 | /**
 14 |  * Train Word2Vec and save the model.
 15 |  */
 16 | object PrepareCorpus extends Logging {
 17 |   {
 18 |     // Initialize Network Logging
 19 |     val PATTERN = "%d{yy/MM/dd HH:mm:ss} %p %C{2}: %m%n"
 20 |     val orgFile = new RollingFileAppender(new PatternLayout(PATTERN), "spark.log")
 21 |     orgFile.setMaxFileSize("1MB")
 22 |     orgFile.setMaxBackupIndex(5)
 23 |     val root = Logger.getRootLogger
 24 |     root.addAppender(orgFile)
 25 |     root.setLevel(Level.WARN)
 26 |     root.setAdditivity(false)
 27 |     val krFile = new RollingFileAppender(new PatternLayout(PATTERN), "trainer.log")
 28 |     krFile.setMaxFileSize("1MB")
 29 |     krFile.setMaxBackupIndex(10)
 30 |     val kr = Logger.getLogger("kr.ac")
 31 |     kr.addAppender(krFile)
 32 |     kr.setLevel(Level.INFO)
 33 |   }
 34 | 
 35 |   /**
 36 |    * Main thread.
 37 |    * @param args CLI arguments
 38 |    */
 39 |   def main(args: Array[String]) =
 40 |     if (args.length == 0 || args.contains("--help") || args.contains("-h")) {
 41 |       println(
 42 |         """Tokenize sentences, and Collect several types of unknown words.
 43 |           |
 44 |           |== Arguments without default ==
 45 |           | -i	Path of input corpora file.
 46 |           | -o	Path of tokenized output text file.
 47 |           |
 48 |           |== Arguments with default ==
 49 |           | --srlz	Local Path of Serialized Language Filter file. (Default: filter.dat)
 50 |           | --thre	Minimum include count. (Default: 3)
 51 |           | --part	Number of partitios. (Default: organized by Spark)
 52 |           | --lang	Accepted Language Area of Unicode. (Default: \\\\u0000-\\\\u007f)
 53 |           |       	For Korean: 가-힣|\\\\u0000-\\\\u007f
 54 |           |
 55 |           |== Additional Arguments ==
 56 |           | --help	Display this help message.
 57 |           | """.stripMargin)
 58 |     } else {
 59 |       // Set spark context
 60 |       val conf = new SparkConf()
 61 |         .setAppName("Normalize Infrequent words")
 62 |         .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 63 |         .set("spark.scheduler.mode", "FAIR")
 64 |         .set("spark.shuffle.memoryFraction", "0.05")
 65 |         .set("spark.storage.unrollFraction", "0.05")
 66 |         .set("spark.storage.memoryFraction", "0.9")
 67 |         .set("spark.broadcast.blockSize", "40960")
 68 |         .set("spark.akka.frameSize", "50")
 69 |         .set("spark.locality.wait", "10000")
 70 |       val sc = new SparkContext(conf)
 71 |       sc.setLocalProperty("spark.scheduler.pool", "production")
 72 | 
 73 |       val langArea = getArgument(args, "--lang", "\\u0000-\\u007f")
 74 |       val langFilter = LangFilter(langArea)
 75 |       val bcFilter = sc.broadcast(langFilter)
 76 |       langFilter.saveAs(getArgument(args, "--srlz", "filter.dat"))
 77 |       logInfo(s"Language filter created : $langArea")
 78 | 
 79 |       // read file
 80 |       val in = getArgument(args, "-i", "article.txt")
 81 |       val parts = getArgument(args, "--part", "1").toInt
 82 |       val lines = sc.textFile(in, parts).filter(_.trim.nonEmpty)
 83 |       val tokens = tokenize(lines, bcFilter)
 84 | 
 85 |       val threshold = getArgument(args, "--thre", "3").toInt
 86 |       val infreqWords = infrequentWords(tokens.flatMap(x ⇒ x), threshold)
 87 |       val infreqSet = sc.broadcast(infreqWords)
 88 | 
 89 |       val out = getArgument(args, "-o", "article-preproc.txt")
 90 |       normalizedTokens(tokens, infreqSet).saveAsTextFile(out)
 91 | 
 92 |       // Stop the context
 93 |       sc.stop()
 94 |     }
 95 | 
 96 |   /**
 97 |    * Read argument
 98 |    * @param args Argument Array
 99 |    * @param key Argument Key
100 |    * @param default Default value of this argument
101 |    * @return Value of this key.
102 |    */
103 |   def getArgument(args: Array[String], key: String, default: String) = {
104 |     val idx = args.indexOf(key)
105 |     if (idx < 0 || idx > args.length - 1) default
106 |     else args(idx + 1)
107 |   }
108 | 
109 |   /**
110 |    * Collect frequent words with count >= Threshold
111 |    * @param words Word seq.
112 |    * @return HashSet of frequent words.
113 |    */
114 |   def infrequentWords(words: RDD[String], threshold: Int) = {
115 |     val counts = words.countByValue()
116 |     val above = counts.count(_._2 >= threshold)
117 |     val set = counts.filter(_._2 < threshold).keySet
118 |     val value = new util.HashSet[String]()
119 |     value ++= set
120 | 
121 |     val all = above + set.size
122 |     val ratio = Math.round(set.size.toFloat / all * 100)
123 |     logInfo(s"Total $all distinct words, ${set.size} words($ratio%) will be discarded.")
124 | 
125 |     value
126 |   }
127 | 
128 |   /**
129 |    * Convert input into tokenized string, using Stanford NLP toolkit.
130 |    * @param lines Input lines
131 |    * @return tokenized & normalized lines.
132 |    */
133 |   def tokenize(lines: RDD[String], bcFilter: Broadcast[_ <: WordFilter]) =
134 |     lines.map(bcFilter.value.tokenize).persist(StorageLevel.DISK_ONLY_2)
135 | 
136 |   /**
137 |    * Convert tokenized string into a sentence, with appropriate conversion of (Threshold - 1) count word.
138 |    * @param input Tokenized input sentence
139 |    * @param infreqSet Less Frequent words
140 |    * @return Tokenized converted sentence
141 |    */
142 |   def normalizedTokens(input: RDD[_ <: Seq[String]], infreqSet: Broadcast[util.HashSet[String]]) =
143 |     input.mapPartitions {
144 |       lazy val set = infreqSet.value
145 | 
146 |       _.map {
147 |         seq ⇒
148 |           val it = seq.iterator
149 |           val buf = StringBuilder.newBuilder
150 | 
151 |           while(it.hasNext){
152 |             val word = it.next()
153 |             if (set contains word){
154 |               buf.append(WordModel.OTHER_UNK)
155 |             }else{
156 |               buf.append(word)
157 |             }
158 |             buf.append(' ')
159 |           }
160 | 
161 |           buf.result()
162 |       }
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/wordvec/StringToVectorType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.wordvec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | import org.apache.spark.broadcast.Broadcast
 6 | 
 7 | /**
 8 |  * __Input Operation__ : String as Input & ScalarMatrix as Otput __(Spark ONLY)__
 9 |  *
10 |  * @param model Broadcast of WordEmbedding model that contains all meaningful words.
11 |  * @param error An objective function `(Default: [[kr.ac.kaist.ir.deep.fn.SquaredErr]])`
12 |  *
13 |  * @example
14 |  * {{{var make = new StringToVectorType(model = wordModel, error = CrossEntropyErr)
15 |  *     var out = make onewayTrip (net, in)}}}
16 |  */
17 | class StringToVectorType(protected override val model: Broadcast[WordModel],
18 |                          override val error: Objective) extends StringType[ScalarMatrix] {
19 |   /**
20 |    * Apply & Back-prop given single input
21 |    *
22 |    * @param net A network that gets input
23 |    * @param delta Sequence of delta updates
24 |    */
25 |   def roundTrip(net: Network, delta: Seq[ScalarMatrix]) = (in: String, real: ScalarMatrix) ⇒ {
26 |     val out = net.passedBy(model.value(in))
27 |     val err: ScalarMatrix = error.derivative(real, out)
28 |     net updateBy(delta.toIterator, err)
29 |   }
30 | 
31 |   /**
32 |    * Make validation output
33 |    *
34 |    * @param net A network that gets input
35 |    * @param pair (Input, Real output) pair for computation
36 |    * @return input as string
37 |    */
38 |   override def stringOf(net: Network, pair: (String, ScalarMatrix)): String = {
39 |     val in = pair._1
40 |     val real = pair._2
41 |     val out = net of model.value(in)
42 |     s"IN: $in EXP: ${real.mkString} → OUT: ${out.mkString}"
43 |   }
44 | 
45 |   /**
46 |    * Apply given input and compute the error
47 |    *
48 |    * @param net A network that gets input
49 |    * @param pair (Input, Real output) for error computation.
50 |    * @return error of this network
51 |    */
52 |   override def lossOf(net: Network)(pair: (String, ScalarMatrix)): Scalar = {
53 |     val in = pair._1
54 |     val real = pair._2
55 |     val out = net of model.value(in)
56 |     error(real, out)
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/wordvec/StringType.scala:
--------------------------------------------------------------------------------
 1 | package kr.ac.kaist.ir.deep.wordvec
 2 | 
 3 | import kr.ac.kaist.ir.deep.fn._
 4 | import kr.ac.kaist.ir.deep.network.Network
 5 | import kr.ac.kaist.ir.deep.train.{Corruption, ManipulationType}
 6 | import org.apache.spark.broadcast.Broadcast
 7 | 
 8 | /**
 9 |  * __Trait of Input Operation__ : String as Input. This is an '''Abstract Implementation'''
10 |  *
11 |  * @tparam OUT Output type.
12 |  */
13 | trait StringType[OUT] extends ManipulationType[String, OUT] {
14 |   override val corrupt: Corruption = null
15 |   protected val model: Broadcast[WordModel]
16 | 
17 |   /**
18 |    * Corrupt input : No corruption for string.
19 |    *
20 |    * @param x input to be corrupted
21 |    * @return corrupted input
22 |    */
23 |   override def corrupted(x: String): String = x
24 | 
25 |   /**
26 |    * Apply given single input as one-way forward trip.
27 |    *
28 |    * @param net A network that gets input
29 |    * @param x input to be computed
30 |    * @return output of the network.
31 |    */
32 |   override def onewayTrip(net: Network, x: String): ScalarMatrix =
33 |     net of model.value(x)
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/kr/ac/kaist/ir/deep/wordvec/package.scala:
--------------------------------------------------------------------------------
  1 | package kr.ac.kaist.ir.deep
  2 | 
  3 | import java.io.{ObjectInputStream, ObjectOutputStream}
  4 | import java.util
  5 | import java.util.regex.Pattern
  6 | 
  7 | import kr.ac.kaist.ir.deep.fn._
  8 | import org.apache.log4j.Logger
  9 | 
 10 | import scala.collection.JavaConversions._
 11 | import scala.collection.mutable
 12 | import scala.io.Codec
 13 | import scala.reflect.io.{File, Path}
 14 | 
 15 | /**
 16 |  * Package for WordEmbedding training __(Unstable)__
 17 |  */
 18 | package object wordvec {
 19 | 
 20 |   /** Pattern for real number **/
 21 |   final val PATTERN_REAL = Pattern.compile("^[0-9]+\\.[0-9]+$", Pattern.UNICODE_CHARACTER_CLASS)
 22 |   final val PATTERN_REAL_WITHIN = Pattern.compile("\\s+[0-9]+\\.[0-9]+\\s+", Pattern.UNICODE_CHARACTER_CLASS)
 23 |   /** Pattern for integer **/
 24 |   final val PATTERN_INTEGER = Pattern.compile("^[0-9]+$", Pattern.UNICODE_CHARACTER_CLASS)
 25 |   /** Pattern for Punctuation **/
 26 |   final val PATTERN_PUNCT = Pattern.compile("(\\p{Punct})", Pattern.UNICODE_CHARACTER_CLASS)
 27 |   /** Pattern for Special Range **/
 28 |   final val PATTERN_SPECIAL = Pattern.compile("^≪[A-Z]+≫$", Pattern.UNICODE_CHARACTER_CLASS)
 29 | 
 30 |   /**
 31 |    * Word Filter type
 32 |    */
 33 |   trait WordFilter extends (String ⇒ String) with Serializable {
 34 |     /**
 35 |      * Tokenize given string using this filter
 36 |      * @param str String for tokenize
 37 |      * @return Array of tokens
 38 |      */
 39 |     def tokenize(str: String): mutable.WrappedArray[String]
 40 | 
 41 |     /**
 42 |      * Save this filter into given path
 43 |      * @param path Path to save.
 44 |      */
 45 |     def saveAs(path: Path): this.type = saveAs(File(path))
 46 | 
 47 |     /**
 48 |      * Save this filter into given file
 49 |      * @param file File to save.
 50 |      */
 51 |     def saveAs(file: File): this.type = {
 52 |       val oos = new ObjectOutputStream(file.outputStream())
 53 |       oos.writeObject(this)
 54 |       oos.close()
 55 |       this
 56 |     }
 57 |   }
 58 | 
 59 |   /**
 60 |    * __WordFilter__ : Filter class for take only specific language area.
 61 |    * @param langFilter Regular Expression String indicating accepted Unicode area.
 62 |    */
 63 |   case class LangFilter(langFilter: String) extends WordFilter{
 64 |     val langPattern = Pattern.compile(s"[^$langFilter\\p{Punct}]+", Pattern.UNICODE_CHARACTER_CLASS)
 65 | 
 66 |     def tokenize(str: String): mutable.WrappedArray[String] = {
 67 |       val withReal = PATTERN_REAL_WITHIN.matcher(s" $str ")
 68 |         .replaceAll(" ≪REALNUM≫ ").trim()
 69 |       PATTERN_PUNCT.matcher(withReal).replaceAll(" $1 ").split("\\s+")
 70 |         .transform(apply)
 71 |     }
 72 | 
 73 |     /**
 74 |      * Normalize words
 75 |      * @param word Word String to be normalized
 76 |      * @return Normalized word string.
 77 |      */
 78 |     def apply(word: String) =
 79 |       if (PATTERN_SPECIAL.matcher(word).find()){
 80 |         // Remain those functional words.
 81 |         word
 82 |       } else if (PATTERN_REAL.matcher(word).find()) {
 83 |         "≪REALNUM≫"
 84 |       } else if (PATTERN_INTEGER.matcher(word).find()) {
 85 |         "≪NUMBERS≫"
 86 |       } else if (langPattern.matcher(word).find()) {
 87 |         "≪FOREIGN≫"
 88 |       } else
 89 |         word
 90 |   }
 91 | 
 92 |   /**
 93 |    * Word2Vec model class.
 94 |    * @param map Mapping between String to Array[Coord]
 95 |    */
 96 |   class WordModel(val map: util.HashMap[String, Array[Scalar]]) extends Serializable with (String ⇒ ScalarMatrix) {
 97 |     private final val OTHER_VEC = map(WordModel.OTHER_UNK)
 98 |     lazy val vectorSize = map.head._2.length
 99 |     private var filter: WordFilter = LangFilter("\\u0000-\\u007f")
100 | 
101 |     /**
102 |      * Load Word Filter
103 |      * @param path Path where Serialized Filter saved
104 |      */
105 |     def loadFilter(path: Path): this.type = loadFilter(File(path))
106 | 
107 |     /**
108 |      * Load Word Filter
109 |      * @param file File where Serialized Filter saved
110 |      */
111 |     def loadFilter(file: File): this.type = {
112 |       if (file.exists && file.isFile) {
113 |         val ois = new ObjectInputStream(file.inputStream())
114 |         val filter = ois.readObject().asInstanceOf[WordFilter]
115 |         ois.close()
116 |         setFilter(filter)
117 |       }
118 | 
119 |       this
120 |     }
121 | 
122 |     /**
123 |      * Set Word Filter
124 |      * @param newFilter Filter to be set
125 |      */
126 |     def setFilter(newFilter: WordFilter) = {
127 |       filter = newFilter
128 |     }
129 | 
130 |     /**
131 |      * Get Matrix(Vector) of given word
132 |      * @param word Word string for search
133 |      * @return Column Vector of given word
134 |      */
135 |     def apply(word: String) = {
136 |       val vec = map.getOrDefault(filter(word), OTHER_VEC)
137 |       ScalarMatrix(vec:_*)
138 |     }
139 | 
140 |     /**
141 |      * Tokenize given string using word filter
142 |      * @param str String to tokenize
143 |      * @return Tokenized string (WrappedArray)
144 |      */
145 |     def tokenize(str: String) = filter.tokenize(str)
146 | 
147 |     /**
148 |      * Tokenize given string and take average vector of them
149 |      * @param str String to compute
150 |      * @return Average word embedding of given string.
151 |      */
152 |     def tokenizeAndApply(str: String):ScalarMatrix = {
153 |       val array = filter.tokenize(str)
154 |       val len = array.length
155 |       val res = ScalarMatrix $0 (vectorSize, 1)
156 |       var i = len
157 |       while(i > 0){
158 |         i -= 1
159 |         val vec = map.getOrDefault(array(i), OTHER_VEC)
160 |         var d = vectorSize
161 |         while(d > 0){
162 |           d -= 1
163 |           res(d, 0) += vec(d) / len.toFloat
164 |         }
165 |       }
166 | 
167 |       res
168 |     }
169 | 
170 |     /**
171 |      * Check existance of given word
172 |      * @param word Word string for search
173 |      * @return True if it is in the list
174 |      */
175 |     def contains(word: String) = map.containsKey(filter(word))
176 | 
177 |     /**
178 |      * Write model into given path.
179 |      * @param path Path where to store.
180 |      */
181 |     def saveAs(path: Path): Unit = saveAs(File(path))
182 | 
183 |     /**
184 |      * Write model into given file.
185 |      * @param file File where to store
186 |      */
187 |     def saveAs(file: File): Unit = {
188 |       val bw = file.bufferedWriter(append = false, codec = Codec.UTF8)
189 |       map.foreach {
190 |         case (word, vec) ⇒
191 |           bw.write(s"$word\t")
192 |           val str = vec.map {
193 |             v ⇒ f"$v%.8f"
194 |           }.mkString(" ")
195 |           bw.write(str)
196 |       }
197 |       bw.close()
198 |     }
199 |   }
200 | 
201 |   /**
202 |    * Companion object of [[WordModel]]
203 |    */
204 |   object WordModel extends Serializable {
205 |     final val OTHER_UNK = "≪UNKNOWN≫"
206 |     val logger = Logger.getLogger(this.getClass)
207 | 
208 |     /**
209 |      * Restore Word Model from Path.
210 |      * @param path Path of word model file.
211 |      * @param normalize True if you want vectors are normalized by longest length of vector.
212 |      * @return WordModel restored from file.
213 |      */
214 |     def apply(path: Path, normalize: Boolean): WordModel = apply(File(path), normalize)
215 | 
216 |     /**
217 |      * Restore WordModel from File.
218 |      * @param file File where to read
219 |      * @param normalize True if you want vectors are normalized by longest length of vector.
220 |      * @return WordModel restored from file.
221 |      */
222 |     def apply(file: File, normalize: Boolean = false): WordModel = {
223 |       val path = file.path + (if (normalize) ".norm.obj" else ".orig.obj")
224 |       if (File(path).exists) {
225 |         val in = new ObjectInputStream(File(path).inputStream())
226 |         val model = in.readObject().asInstanceOf[WordModel]
227 |         in.close()
228 | 
229 |         logger info "READ Word2Vec finished."
230 |         model
231 |       } else {
232 |         val br = file.bufferedReader(Codec.UTF8)
233 |         val firstLine = br.readLine().split("\\s+")
234 |         val mapSize = firstLine(0).toInt
235 |         val vectorSize = firstLine(1).toInt
236 | 
237 |         val buffer = new util.HashMap[String, Array[Scalar]]()
238 |         var lineNo = mapSize
239 |         var maxlen = 0.0f
240 | 
241 |         while (lineNo > 0) {
242 |           lineNo -= 1
243 |           if (lineNo % 10000 == 0)
244 |             logger info f"READ Word2Vec file : $lineNo%9d/$mapSize%9d"
245 | 
246 |           val line = br.readLine()
247 |           val splits = line.split("\\s+")
248 |           val word = splits(0)
249 |           val vector = splits.view.slice(1, vectorSize + 1).map(_.toFloat).force
250 |           val len = vector.map(Math.abs).max
251 |           require(vector.length == vectorSize, s"'$word' Vector is broken! Read size ${vector.length}, but expected $vectorSize")
252 | 
253 |           if (maxlen < len)
254 |             maxlen = len
255 |           buffer += word → vector
256 |         }
257 | 
258 |         br.close()
259 | 
260 |         if (normalize && maxlen > 0f) {
261 |           logger info f"READ Word2Vec file : Maximum absolute value of entry in vector matrix = $maxlen%.4f"
262 |           buffer.foreach {
263 |             case (_, vec) ⇒
264 |               var i = vec.length
265 |               while (i > 0) {
266 |                 i -= 1
267 |                 vec.update(i, vec(i) / maxlen)
268 |               }
269 |           }
270 |         }
271 | 
272 |         val model = new WordModel(buffer)
273 |         val stream = new ObjectOutputStream(File(path).outputStream())
274 |         stream.writeObject(model)
275 |         stream.close()
276 | 
277 |         logger info "READ Word2Vec finished."
278 |         model
279 |       }
280 |     }
281 |   }
282 | }
283 | 


--------------------------------------------------------------------------------