├── .gitignore
├── LICENSE
├── README.md
├── build.sbt
├── pom.xml
├── project
    └── plugins.sbt
├── scalastyle-config.xml
└── src
    ├── main
        └── scala
        │   ├── org
        │       └── apache
        │       │   └── spark
        │       │       └── ml
        │       │           └── scaladl
        │       │               ├── MultilayerPerceptronClassifier.scala
        │       │               └── StackedAutoencoder.scala
        │   └── scaladl
        │       ├── examples
        │           ├── MnistClassification.scala
        │           └── MnistEncoding.scala
        │       ├── layers
        │           ├── Layer.scala
        │           └── LossFunction.scala
        │       ├── optimization
        │           ├── Gradient.scala
        │           ├── GradientDescent.scala
        │           ├── LBFGS.scala
        │           ├── Optimizer.scala
        │           └── Updater.scala
        │       └── tensor
        │           └── DenseTensor.scala
    └── test
        └── scala
            ├── org
                └── apache
                │   └── spark
                │       └── ml
                │           └── scaladl
                │               ├── ANNSpeedSuite.scala
                │               ├── MultilayerPerceptronClassifierSuite.scala
                │               └── StackedAutoencoderSuite.scala
            └── scaladl
                ├── layers
                    ├── GradientSuite.scala
                    └── LayerSuite.scala
                ├── tensor
                    ├── DenseTensorSuite.scala
                    ├── NumericBoxingTest.scala
                    └── TypedClassTest.scala
                └── util
                    └── SparkTestContext.scala


/.gitignore:
--------------------------------------------------------------------------------
 1 | # use glob syntax.
 2 | syntax: glob
 3 | *.ser
 4 | *.class
 5 | *~
 6 | *.bak
 7 | #*.off
 8 | *.old
 9 | 
10 | # eclipse conf file
11 | .settings
12 | .classpath
13 | .project
14 | .manager
15 | .scala_dependencies
16 | 
17 | # idea
18 | .idea
19 | *.iml
20 | 
21 | # building
22 | target
23 | build
24 | null
25 | tmp*
26 | temp*
27 | dist
28 | test-output
29 | build.log
30 | 
31 | # other scm
32 | .svn
33 | .CVS
34 | .hg*
35 | 
36 | # switch to regexp syntax.
37 | #  syntax: regexp
38 | #  ^\.pc/
39 | 
40 | #SHITTY output not in target directory
41 | build.log
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A Scalable Implementation of Deep Learning on Spark
  2 | This library is based on the implementation of artificial neural networks in [Spark ML](https://spark.apache.org/docs/latest/ml-classification-regression.html#multilayer-perceptron-classifier). In addition to the multilayer perceptron, it contains new [Spark deep learning features](https://issues.apache.org/jira/browse/SPARK-5575) that were not yet merged to Spark ML. Currently, they are Stacked Autoencoder and tensor data flow. Highlights of the library:
  3 |   - Provides Spark ML pipeline API
  4 |   - Implements data parallel training
  5 |   - Supports native CPU BLAS
  6 |   - Employs tensor data flow
  7 |   - Provides extensible API for developers of new features
  8 | 
  9 | ## Installation
 10 | ### Requirements
 11 |   - Apache Spark 2.0 or higher
 12 |   - Java and Scala
 13 |   - Maven
 14 | 
 15 | ### Build 
 16 | Clone and compile:
 17 | ```
 18 | git clone https://github.com/avulanov/scalable-deeplearning.git
 19 | cd scalable-deeplearning
 20 | sbt assembly (or mvn assembly)
 21 | ```
 22 | The jar library will be availabe in `target` folder. `assembly` includes optimized numerical processing library netlib-java. Optionally, one can build `package`.
 23 | 
 24 | ### Performance configuration
 25 | Scaladl uses [netlib-java](https://github.com/fommil/netlib-java) library for optimized numerical processing with native [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms). All netlib-java classes are included in scaladl.jar. The latter has to be in the classpath before Spark's own libraries because Spark has a subset of netlib. In order to do this, set `spark.driver.userClassPathFirst` to `true` in `spark-defaults.conf`.
 26 | 
 27 | If native BLAS libraries are not available at runtime or scaladl is not the first in the classpath, you will see a warning `WARN BLAS: Failed to load implementation from:` and reference or pure JVM implementation will be used. Native BLAS library such as OpenBLAS (`libopenblas.so` or `dll`) or ATLAS (`libatlas.so`) should be in the path of all nodes that run Spark. Netlib-java requires the library to be named as `libblas.so.3`, and one has to create a symlink. The same is for Windows and `libblas3.dll`. Below are the setup details for different platforms. With proper configuration, you will see an info `INFO JniLoader: successfully loaded ...netlib-native_system-....`
 28 | 
 29 | ### Linux:
 30 | Install native blas library (depending on your distributive):
 31 | ```
 32 | yum install openblas <OR> apt-get openblas <OR> download and compile OpenBLAS
 33 | ```
 34 | Create symlink to native BLAS within its folder `/your/blas`
 35 | ```
 36 | ln -s libopenblas.so libblas.so.3
 37 | ```
 38 | Add it to your library path. Make sure there is no other folder with `libblas.so.3` in your path.
 39 | ```
 40 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/your/blas
 41 | ```
 42 | ### Windows:
 43 | Copy the following dlls from MINGW distribution and from OpenBLAS to the folder `blas`. Make sure they are all the same 64 or 32 bit. Add that folder to your `path` variable.
 44 | ```
 45 | libquadmath-0.dll // MINGW
 46 | libgcc_s_seh-1.dll // MINGW
 47 | libgfortran-3.dll // MINGW
 48 | libopeblas.dll // OpenBLAS binary
 49 | liblapack3.dll // copy of libopeblas.dll
 50 | libblas3.dll // copy of libopenblas.dll
 51 | ```
 52 |   - MinGW https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win64/Automated%20Builds/
 53 |   - OpenBLAS http://www.openblas.net/
 54 | 
 55 | ## Example of use
 56 | ### Built-in examples
 57 | Scaldl provides working examples of MNIST classification and pre-training with stacked autoencoder. Examples are in [`scaladl.examples`](https://github.com/avulanov/scalable-deeplearning/tree/master/src/main/scala/scaladl/examples) package. They can be run via Spark submit:
 58 | ```
 59 | ./spark-submit --class scaladl.examples.MnistClassification --master spark://master:7077 /path/to/scaldl.jar /path/to/mnist-libsvm
 60 | ```
 61 | ### Spark shell
 62 | Start Spark with this library:
 63 | ```
 64 | ./spark-shell --jars scaladl.jar
 65 | ```
 66 | Or use it as external dependency for your application.
 67 | 
 68 | ### Multilayer perceptron
 69 | MNIST classification
 70 |   - Load MNIST handwritten recognition data stored in [LIBSVM format](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html) as a DataFrame
 71 |   - Initialize the multilayer perceptron classifier with 784 inputs, 32 neurons in hidden layer and 10 outputs
 72 |   - Train and predict
 73 | 
 74 | ```scala
 75 | import org.apache.spark.ml.scaladl.MultilayerPerceptronClassifier
 76 | val train = spark.read.format("libsvm").option("numFeatures", 784).load("mnist.scale").persist()
 77 | val test = spark.read.format("libsvm").option("numFeatures", 784).load("mnist.scale.t").persist()
 78 | train.count() // materialize data lazy persisted in memory
 79 | test.count() // materialize data lazy persisted in memory
 80 | val trainer = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10)).setMaxIter(100)
 81 | val model = trainer.fit(train)
 82 | val result = model.transform(test)
 83 | ```
 84 | ### Stacked Autoencoder
 85 | Pre-training
 86 |   - Load MNIST data
 87 |   - Initialize the stacked autoencoder with 784 inputs and 32 neurons in hidden layer
 88 |   - Train stacked autoencoder
 89 |   - Initialize the multilayer perceptron classifier with 784 inputs, 32 neurons in hidden layer and 
 90 | ```scala
 91 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier, StackedAutoencoder}
 92 | val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist()
 93 | train.count()
 94 | val stackedAutoencoder = new StackedAutoencoder().setLayers(Array(784, 32))
 95 |   .setInputCol("features")
 96 |   .setOutputCol("output")
 97 |   .setDataIn01Interval(true)
 98 |   .setBuildDecoder(false)
 99 | val saModel = stackedAutoencoder.fit(train)
100 | val autoWeights = saModel.encoderWeights
101 | val trainer = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10)).setMaxIter(1)
102 | val initialWeights = trainer.fit(train).weights
103 | System.arraycopy(autoWeights.toArray, 0, initialWeights.toArray, 0, autoWeights.toArray.length)
104 | trainer.setInitialWeights(initialWeights).setMaxIter(10)
105 | val model = trainer.fit(train)
106 | ```
107 | ## Contributions
108 | Contributions are welcome, in particular in the following areas:
109 |   - New layers
110 |     - Convolutional
111 |     - ReLu
112 |   - Flexibility
113 |     - Implement the reader of Caffe/other deep learning configuration format
114 |     - Implement Python/R/Java interface
115 |   - Efficiency
116 |     - Switch from double to single precision 
117 |     - Implement wrapper to specialized deep learning libraries, e.g. TensorFlow
118 |   - Refactoring
119 |     - Implement own version of L-BFGS to remove dependency on breeze
120 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "scalable-deeplearning"
 2 | 
 3 | version := "1.0.0"
 4 | 
 5 | scalaVersion := "2.11.7"
 6 | 
 7 | spName := "avulanov/scalable-deeplearning"
 8 | 
 9 | spShade := true
10 | 
11 | sparkVersion := "2.0.0"
12 | 
13 | libraryDependencies ++= Seq(
14 |   "com.github.fommil.netlib" % "all" % "1.1.2",
15 |   "org.scalatest" % "scalatest_2.11" % "2.2.4" % "test"
16 | )
17 | 
18 | sparkComponents += "mllib"
19 | 
20 | // libraryDependencies ++= Seq(
21 | //  "org.apache.spark" % "spark-core_2.11" % "2.0.0" % "provided",
22 | //  "org.apache.spark" % "spark-mllib_2.11" % "2.0.0" % "provided"
23 | // )
24 | 
25 | test in assembly := {}
26 | 
27 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  2 |   <modelVersion>4.0.0</modelVersion>
  3 |   <groupId>scaladl</groupId>
  4 |   <artifactId>scaladl</artifactId>
  5 |   <version>1.0.0</version>
  6 |   <name>${project.artifactId}</name>
  7 |   <description>Scalable implementation of Deep Learning for Spark</description>
  8 |   <inceptionYear>2016</inceptionYear>
  9 |   <licenses>
 10 |     <license>
 11 |       <name>Apache</name>
 12 |       <url>http://....</url>
 13 |       <distribution>repo</distribution>
 14 |     </license>
 15 |   </licenses>
 16 | 
 17 |   <properties>
 18 |     <maven.compiler.source>1.6</maven.compiler.source>
 19 |     <maven.compiler.target>1.6</maven.compiler.target>
 20 |     <encoding>UTF-8</encoding>
 21 |     <scala.version>2.11.5</scala.version>
 22 |     <scala.compat.version>2.11</scala.compat.version>
 23 |   </properties>
 24 | 
 25 |   <dependencies>
 26 |     <dependency>
 27 |       <groupId>org.scala-lang</groupId>
 28 |       <artifactId>scala-library</artifactId>
 29 |       <version>${scala.version}</version>
 30 |       <scope>provided</scope>
 31 |     </dependency>
 32 |     <dependency>
 33 |       <groupId>com.github.fommil.netlib</groupId>
 34 |       <artifactId>all</artifactId>
 35 |       <version>1.1.2</version>
 36 |       <type>pom</type>
 37 |     </dependency>
 38 |     <dependency>
 39 |       <groupId>org.apache.spark</groupId>
 40 |       <artifactId>spark-core_2.11</artifactId>
 41 |       <version>2.0.0</version>
 42 |       <scope>provided</scope>
 43 |     </dependency>
 44 |     <dependency>
 45 |       <groupId>org.apache.spark</groupId>
 46 |       <artifactId>spark-mllib_2.11</artifactId>
 47 |       <version>2.0.0</version>
 48 |       <scope>provided</scope>
 49 |     </dependency>
 50 | 
 51 |     <!-- Test -->
 52 |     <dependency>
 53 |       <groupId>junit</groupId>
 54 |       <artifactId>junit</artifactId>
 55 |       <version>4.11</version>
 56 |       <scope>test</scope>
 57 |     </dependency>
 58 |     <dependency>
 59 |       <groupId>org.specs2</groupId>
 60 |       <artifactId>specs2-core_${scala.compat.version}</artifactId>
 61 |       <version>2.4.16</version>
 62 |       <scope>test</scope>
 63 |     </dependency>
 64 |     <dependency>
 65 |       <groupId>org.scalatest</groupId>
 66 |       <artifactId>scalatest_${scala.compat.version}</artifactId>
 67 |       <version>2.2.4</version>
 68 |       <scope>test</scope>
 69 |     </dependency>
 70 |   </dependencies>
 71 | 
 72 |   <build>
 73 |     <sourceDirectory>src/main/scala</sourceDirectory>
 74 |     <testSourceDirectory>src/test/scala</testSourceDirectory>
 75 |     <plugins>
 76 |       <plugin>
 77 |         <!-- see http://davidb.github.com/scala-maven-plugin -->
 78 |         <groupId>net.alchim31.maven</groupId>
 79 |         <artifactId>scala-maven-plugin</artifactId>
 80 |         <version>3.2.0</version>
 81 |         <executions>
 82 |           <execution>
 83 |             <goals>
 84 |               <goal>compile</goal>
 85 |               <goal>testCompile</goal>
 86 |             </goals>
 87 |             <configuration>
 88 |               <args>
 89 |                 <arg>-dependencyfile</arg>
 90 |                 <arg>${project.build.directory}/.scala_dependencies</arg>
 91 |               </args>
 92 |             </configuration>
 93 |           </execution>
 94 |         </executions>
 95 |       </plugin>
 96 |       <plugin>
 97 |         <groupId>org.apache.maven.plugins</groupId>
 98 |         <artifactId>maven-surefire-plugin</artifactId>
 99 |         <version>2.18.1</version>
100 |         <configuration>
101 |           <useFile>false</useFile>
102 |           <disableXmlReport>true</disableXmlReport>
103 |           <!-- If you have classpath issue like NoDefClassError,... -->
104 |           <!-- useManifestOnlyJar>false</useManifestOnlyJar -->
105 |           <includes>
106 |             <include>**/*Test.*</include>
107 |             <include>**/*Suite.*</include>
108 |           </includes>
109 |         </configuration>
110 |       </plugin>
111 |       <plugin>
112 |         <groupId>org.scalastyle</groupId>
113 |         <artifactId>scalastyle-maven-plugin</artifactId>
114 |         <version>0.8.0</version>
115 |         <configuration>
116 |           <verbose>false</verbose>
117 |           <failOnViolation>true</failOnViolation>
118 |           <includeTestSourceDirectory>false</includeTestSourceDirectory>
119 |           <failOnWarning>false</failOnWarning>
120 |           <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
121 |           <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
122 |           <configLocation>scalastyle-config.xml</configLocation>
123 |           <outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
124 |           <inputEncoding>UTF-8</inputEncoding>
125 |           <outputEncoding>UTF-8</outputEncoding>
126 |         </configuration>
127 |         <executions>
128 |           <execution>
129 |             <goals>
130 |               <goal>check</goal>
131 |             </goals>
132 |           </execution>
133 |         </executions>
134 |       </plugin>
135 |       <plugin>
136 |         <artifactId>maven-assembly-plugin</artifactId>
137 |         <version>2.6</version>
138 |           <configuration>
139 |             <archive>
140 |               <manifest>
141 |                 <mainClass>scaladl.examples.MnistClassification</mainClass>
142 |               </manifest>
143 |             </archive>
144 |             <descriptorRefs>
145 |               <descriptorRef>jar-with-dependencies</descriptorRef>
146 |             </descriptorRefs>
147 |             <appendAssemblyId>false</appendAssemblyId>
148 |           </configuration>
149 |         <executions>
150 |           <execution>
151 |             <phase>package</phase>
152 |             <goals>
153 |               <goal>single</goal>
154 |             </goals>
155 |           </execution>
156 |         </executions>
157 |       </plugin>
158 |     </plugins>
159 |   </build>
160 | </project>
161 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2")
2 | 
3 | resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/"
4 | 
5 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.5")


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
  3 |   ~ contributor license agreements.  See the NOTICE file distributed with
  4 |   ~ this work for additional information regarding copyright ownership.
  5 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
  6 |   ~ (the "License"); you may not use this file except in compliance with
  7 |   ~ the License.  You may obtain a copy of the License at
  8 |   ~
  9 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 10 |   ~
 11 |   ~ Unless required by applicable law or agreed to in writing, software
 12 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
 13 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |   ~ See the License for the specific language governing permissions and
 15 |   ~ limitations under the License.
 16 |   -->
 17 | <!--
 18 | 
 19 | If you wish to turn off checking for a section of code, you can put a comment in the source
 20 | before and after the section, with the following syntax:
 21 | 
 22 |   // scalastyle:off
 23 |   ...  // stuff that breaks the styles
 24 |   // scalastyle:on
 25 | 
 26 | You can also disable only one rule, by specifying its rule id, as specified in:
 27 |   http://www.scalastyle.org/rules-0.7.0.html
 28 | 
 29 |   // scalastyle:off no.finalize
 30 |   override def finalize(): Unit = ...
 31 |   // scalastyle:on no.finalize
 32 | 
 33 | This file is divided into 3 sections:
 34 |  (1) rules that we enforce.
 35 |  (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
 36 |      (or we need to make the scalastyle rule more configurable).
 37 |  (3) rules that we don't want to enforce.
 38 | -->
 39 | 
 40 | <scalastyle>
 41 |   <name>Scalastyle standard configuration</name>
 42 | 
 43 |   <!-- ================================================================================ -->
 44 |   <!--                               rules we enforce                                   -->
 45 |   <!-- ================================================================================ -->
 46 | 
 47 |   <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
 48 | 
 49 |   <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
 50 |     <parameters>
 51 |        <parameter name="header"><![CDATA[/*
 52 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 53 |  * contributor license agreements.  See the NOTICE file distributed with
 54 |  * this work for additional information regarding copyright ownership.
 55 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 56 |  * (the "License"); you may not use this file except in compliance with
 57 |  * the License.  You may obtain a copy of the License at
 58 |  *
 59 |  *    http://www.apache.org/licenses/LICENSE-2.0
 60 |  *
 61 |  * Unless required by applicable law or agreed to in writing, software
 62 |  * distributed under the License is distributed on an "AS IS" BASIS,
 63 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 64 |  * See the License for the specific language governing permissions and
 65 |  * limitations under the License.
 66 |  */]]></parameter>
 67 |     </parameters>
 68 |   </check>
 69 | 
 70 |   <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 71 | 
 72 |   <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 73 | 
 74 |   <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 75 | 
 76 |   <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 77 |     <parameters>
 78 |       <parameter name="maxLineLength"><![CDATA[100]]></parameter>
 79 |       <parameter name="tabSize"><![CDATA[2]]></parameter>
 80 |       <parameter name="ignoreImports">true</parameter>
 81 |     </parameters>
 82 |   </check>
 83 | 
 84 |   <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 85 |     <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
 86 |   </check>
 87 | 
 88 |   <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 89 |     <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
 90 |   </check>
 91 | 
 92 |   <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 93 |     <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
 94 |   </check>
 95 | 
 96 |   <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 97 |     <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
 98 |   </check>
 99 | 
100 |   <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
101 | 
102 |   <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
103 | 
104 |   <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
105 | 
106 |   <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
107 | 
108 |   <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
109 |     <parameters>
110 |       <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
111 |       <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
112 |     </parameters>
113 |   </check>
114 | 
115 |   <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
116 | 
117 |   <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
118 | 
119 |   <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
120 | 
121 |   <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
122 | 
123 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
124 |    <parameters>
125 |      <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
126 |    </parameters>
127 |   </check>
128 | 
129 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
130 |     <parameters>
131 |      <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
132 |     </parameters>
133 |   </check>
134 | 
135 |   <!-- ??? usually shouldn't be checked into the code base. -->
136 |   <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
137 | 
138 |   <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
139 |   <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="false">
140 |     <parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
141 |     <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
142 |   </check>
143 | 
144 |   <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
145 |   <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
146 |     <parameters><parameter name="regex">^println$</parameter></parameters>
147 |     <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
148 |       // scalastyle:off println
149 |       println(...)
150 |       // scalastyle:on println]]></customMessage>
151 |   </check>
152 | 
153 |   <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
154 |     <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
155 |     <customMessage><![CDATA[
156 |       @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
157 |     ]]></customMessage>
158 |   </check>
159 | 
160 |   <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
161 |     <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
162 |     <customMessage><![CDATA[
163 |       Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
164 |       ShutdownHookManager.addShutdownHook instead.
165 |       If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
166 |       // scalastyle:off runtimeaddshutdownhook
167 |       Runtime.getRuntime.addShutdownHook(...)
168 |       // scalastyle:on runtimeaddshutdownhook
169 |     ]]></customMessage>
170 |   </check>
171 | 
172 |   <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
173 |     <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
174 |     <customMessage><![CDATA[
175 |       Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
176 |       java.util.concurrent.ConcurrentLinkedQueue instead.
177 |       If you must use mutable.SynchronizedBuffer, wrap the code block with
178 |       // scalastyle:off mutablesynchronizedbuffer
179 |       mutable.SynchronizedBuffer[...]
180 |       // scalastyle:on mutablesynchronizedbuffer
181 |     ]]></customMessage>
182 |   </check>
183 | 
184 |   <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
185 |     <parameters><parameter name="regex">Class\.forName</parameter></parameters>
186 |     <customMessage><![CDATA[
187 |       Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
188 |       If you must use Class.forName, wrap the code block with
189 |       // scalastyle:off classforname
190 |       Class.forName(...)
191 |       // scalastyle:on classforname
192 |     ]]></customMessage>
193 |   </check>
194 | 
195 |   <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
196 |     <parameters><parameter name="regex">Await\.result</parameter></parameters>
197 |     <customMessage><![CDATA[
198 |       Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
199 |       If you must use Await.result, wrap the code block with
200 |       // scalastyle:off awaitresult
201 |       Await.result(...)
202 |       // scalastyle:on awaitresult
203 |     ]]></customMessage>
204 |   </check>
205 | 
206 |   <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
207 |   <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
208 |     <parameters><parameter name="regex">JavaConversions</parameter></parameters>
209 |     <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
210 |     scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
211 |   </check>
212 | 
213 |   <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
214 |     <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
215 |     <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
216 |     of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
217 |   </check>
218 | 
219 |   <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
220 |     <parameters>
221 |       <parameter name="groups">java,scala,3rdParty,spark</parameter>
222 |       <parameter name="group.java">javax?\..*</parameter>
223 |       <parameter name="group.scala">scala\..*</parameter>
224 |       <parameter name="group.3rdParty">(?!scaladl\.).*</parameter>
225 |       <parameter name="group.spark">scaladl\..*</parameter>
226 |     </parameters>
227 |   </check>
228 | 
229 |   <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
230 |     <parameters>
231 |       <parameter name="tokens">COMMA</parameter>
232 |     </parameters>
233 |   </check>
234 | 
235 |   <!-- SPARK-3854: Single Space between ')' and '{' -->
236 |   <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
237 |     <parameters><parameter name="regex">\)\{</parameter></parameters>
238 |     <customMessage><![CDATA[
239 |       Single Space between ')' and `{`.
240 |     ]]></customMessage>
241 |   </check>
242 | 
243 |   <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
244 |     <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
245 |     <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
246 |   </check>
247 | 
248 |   <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
249 |     <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
250 |     <customMessage>Omit braces in case clauses.</customMessage>
251 |   </check>
252 | 
253 |   <!-- SPARK-16877: Avoid Java annotations -->
254 |   <check customId="OverrideJavaCase" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
255 |     <parameters><parameter name="regex">^Override$</parameter></parameters>
256 |     <customMessage>override modifier should be used instead of @java.lang.Override.</customMessage>
257 |   </check>
258 | 
259 |   <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
260 | 
261 |   <!-- ================================================================================ -->
262 |   <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
263 |   <!-- ================================================================================ -->
264 | 
265 |   <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
266 |   <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
267 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
268 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
269 | 
270 |   <!-- This breaks symbolic method names so we don't turn it on. -->
271 |   <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
272 |   <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
273 |     <parameters>
274 |     <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
275 |     </parameters>
276 |   </check>
277 | 
278 |   <!-- Should turn this on, but we have a few places that need to be fixed first -->
279 |   <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
280 | 
281 |   <!-- ================================================================================ -->
282 |   <!--                               rules we don't want                                -->
283 |   <!-- ================================================================================ -->
284 | 
285 |   <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
286 |     <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
287 |   </check>
288 | 
289 |   <!-- We want the opposite of this: NewLineAtEofChecker -->
290 |   <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
291 | 
292 |   <!-- This one complains about all kinds of random things. Disable. -->
293 |   <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
294 | 
295 |   <!-- We use return quite a bit for control flows and guards -->
296 |   <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
297 | 
298 |   <!-- We use null a lot in low level code and to interface with 3rd party code -->
299 |   <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
300 | 
301 |   <!-- Doesn't seem super big deal here ... -->
302 |   <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
303 | 
304 |   <!-- Doesn't seem super big deal here ... -->
305 |   <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
306 |     <parameters><parameter name="maxFileLength">800></parameter></parameters>
307 |   </check>
308 | 
309 |   <!-- Doesn't seem super big deal here ... -->
310 |   <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
311 |     <parameters><parameter name="maxTypes">30</parameter></parameters>
312 |   </check>
313 | 
314 |   <!-- Doesn't seem super big deal here ... -->
315 |   <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
316 |     <parameters><parameter name="maximum">10</parameter></parameters>
317 |   </check>
318 | 
319 |   <!-- Doesn't seem super big deal here ... -->
320 |   <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
321 |     <parameters><parameter name="maxLength">50</parameter></parameters>
322 |   </check>
323 | 
324 |   <!-- Not exactly feasible to enforce this right now. -->
325 |   <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
326 |   <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
327 |     <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
328 |   </check>
329 | 
330 |   <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
331 |   <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
332 |     <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
333 |   </check>
334 | 
335 | </scalastyle>
336 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/ml/scaladl/MultilayerPerceptronClassifier.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.ml.scaladl
 19 | 
 20 | import scala.collection.JavaConverters._
 21 | 
 22 | import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
 23 | import org.apache.spark.ml.feature.LabeledPoint
 24 | import org.apache.spark.ml.linalg.{Vector, Vectors}
 25 | import org.apache.spark.ml.param.{DoubleParam, IntArrayParam, IntParam, Param, ParamMap,
 26 | ParamValidators}
 27 | import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed, HasTol}
 28 | import org.apache.spark.ml.util.Identifiable
 29 | import org.apache.spark.sql.Dataset
 30 | 
 31 | import scaladl.layers.{FeedForwardTopology, FeedForwardTrainer}
 32 | 
 33 | /**
 34 |  * Params for Multilayer Perceptron.
 35 |  */
 36 | private[ml] trait MultilayerPerceptronParams extends PredictorParams
 37 |   with HasSeed with HasMaxIter with HasTol {
 38 |   /**
 39 |    * Layer sizes including input size and output size.
 40 |    * Default: Array(1, 1)
 41 |    *
 42 |    * @group param
 43 |    */
 44 |   final val layers: IntArrayParam = new IntArrayParam(this, "layers",
 45 |     "Sizes of layers from input layer to output layer" +
 46 |       " E.g., Array(780, 100, 10) means 780 inputs, " +
 47 |       "one hidden layer with 100 neurons and output layer of 10 neurons.",
 48 |     // TODO: how to check ALSO that all elements are greater than 0?
 49 |     ParamValidators.arrayLengthGt(1)
 50 |   )
 51 | 
 52 |   /** @group getParam */
 53 |   final def getLayers: Array[Int] = $(layers)
 54 | 
 55 |   /**
 56 |    * Block size for stacking input data in matrices to speed up the computation.
 57 |    * Data is stacked within partitions. If block size is more than remaining data in
 58 |    * a partition then it is adjusted to the size of this data.
 59 |    * Recommended size is between 10 and 1000.
 60 |    * Default: 128
 61 |    *
 62 |    * @group expertParam
 63 |    */
 64 |   final val blockSize: IntParam = new IntParam(this, "blockSize",
 65 |     "Block size for stacking input data in matrices. Data is stacked within partitions." +
 66 |       " If block size is more than remaining data in a partition then " +
 67 |       "it is adjusted to the size of this data. Recommended size is between 10 and 1000",
 68 |     ParamValidators.gt(0))
 69 | 
 70 |   /** @group getParam */
 71 |   final def getBlockSize: Int = $(blockSize)
 72 | 
 73 |   /**
 74 |    * Optimizer setup.
 75 |    *
 76 |    * @group expertParam
 77 |    */
 78 |   final val optimizer: Param[String] = new Param[String](this, "optimizer",
 79 |     " Allows setting the optimizer: minibatch gradient descent (GD) or LBFGS. " +
 80 |       " The latter is recommended one. ",
 81 |     ParamValidators.inArray[String](Array("GD", "LBFGS")))
 82 | 
 83 |   /** @group getParam */
 84 |   final def getOptimizer: String = $(optimizer)
 85 | 
 86 |   /**
 87 |    * Learning rate.
 88 |    *
 89 |    * @group expertParam
 90 |    */
 91 |   final val learningRate: DoubleParam = new DoubleParam(this, "learning rate",
 92 |     " Sets the learning rate for gradient descent optimizer ",
 93 |     ParamValidators.inRange(0, 1))
 94 | 
 95 |   /** @group getParam */
 96 |   final def getLearningRate: Double = $(learningRate)
 97 | 
 98 | 
 99 |   /**
100 |    * The initial weights of the model.
101 |    *
102 |    * @group expertParam
103 |    */
104 |   final val initialWeights: Param[Vector] = new Param[Vector](this, "initialWeights",
105 |     "The initial weights of the model")
106 | 
107 |   /** @group expertGetParam */
108 |   final def getInitialWeights: Vector = $(initialWeights)
109 | 
110 |   setDefault(maxIter -> 100, tol -> 1e-6, layers -> Array(1, 1), blockSize -> 128,
111 |     optimizer -> "LBFGS", learningRate -> 0.03)
112 | }
113 | 
114 | /** Label to vector converter. */
115 | private object LabelConverter {
116 |   // TODO: Use OneHotEncoder instead
117 |   /**
118 |    * Encodes a label as a vector.
119 |    * Returns a vector of given length with zeroes at all positions
120 |    * and value 1.0 at the position that corresponds to the label.
121 |    *
122 |    * @param labeledPoint labeled point
123 |    * @param labelCount total number of labels
124 |    * @return pair of features and vector encoding of a label
125 |    */
126 |   def encodeLabeledPoint(labeledPoint: LabeledPoint, labelCount: Int): (Vector, Vector) = {
127 |     val output = Array.fill(labelCount)(0.0)
128 |     output(labeledPoint.label.toInt) = 1.0
129 |     (labeledPoint.features, Vectors.dense(output))
130 |   }
131 | 
132 |   /**
133 |    * Converts a vector to a label.
134 |    * Returns the position of the maximal element of a vector.
135 |    *
136 |    * @param output label encoded with a vector
137 |    * @return label
138 |    */
139 |   def decodeLabel(output: Vector): Double = {
140 |     output.argmax.toDouble
141 |   }
142 | }
143 | 
144 | /**
145 |  * Classifier trainer based on the Multilayer Perceptron.
146 |  * Each layer has sigmoid activation function, output layer has softmax.
147 |  * Number of inputs has to be equal to the size of feature vectors.
148 |  * Number of outputs has to be equal to the total number of labels.
149 |  *
150 |  */
151 | class MultilayerPerceptronClassifier (override val uid: String)
152 |   extends Predictor[Vector, MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel]
153 |     with MultilayerPerceptronParams {
154 | 
155 |   def this() = this(Identifiable.randomUID("mlpc-scaladl"))
156 | 
157 |   /** @group setParam */
158 |   def setLayers(value: Array[Int]): this.type = set(layers, value)
159 | 
160 |   /** @group setParam */
161 |   def setBlockSize(value: Int): this.type = set(blockSize, value)
162 | 
163 |   /**
164 |    * Set the maximum number of iterations.
165 |    * Default is 100.
166 |    *
167 |    * @group setParam
168 |    */
169 |   def setMaxIter(value: Int): this.type = set(maxIter, value)
170 | 
171 |   /**
172 |    * Set the convergence tolerance of iterations.
173 |    * Smaller value will lead to higher accuracy with the cost of more iterations.
174 |    * Default is 1E-4.
175 |    *
176 |    * @group setParam
177 |    */
178 |   def setTol(value: Double): this.type = set(tol, value)
179 | 
180 |   /**
181 |    * Set the seed for weights initialization if weights are not set
182 |    *
183 |    * @group setParam
184 |    */
185 |   def setSeed(value: Long): this.type = set(seed, value)
186 | 
187 |   /**
188 |    * Sets the value of param [[initialWeights]].
189 |    *
190 |    * @group expertSetParam
191 |    */
192 |   def setInitialWeights(value: Vector): this.type = set(initialWeights, value)
193 | 
194 |   /**
195 |    * Generate weights.
196 |    */
197 |   def generateWeights(): Vector = {
198 |     val topology = FeedForwardTopology.multiLayerPerceptron($(layers), true)
199 |     topology.model($(seed)).weights
200 |   }
201 | 
202 |   override def copy(extra: ParamMap): MultilayerPerceptronClassifier = defaultCopy(extra)
203 | 
204 |   /**
205 |    * Train a model using the given dataset and parameters.
206 |    * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
207 |    * and copying parameters into the model.
208 |    *
209 |    * @param dataset Training dataset
210 |    * @return Fitted model
211 |    */
212 |   override protected def train(dataset: Dataset[_]): MultilayerPerceptronClassificationModel = {
213 |     val myLayers = $(layers)
214 |     val labels = myLayers.last
215 |     val lpData = extractLabeledPoints(dataset)
216 |     val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels))
217 |     val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true)
218 |     val trainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last)
219 |     if (isDefined(initialWeights)) {
220 |       trainer.setWeights($(initialWeights))
221 |     } else {
222 |       trainer.setSeed($(seed))
223 |     }
224 |     trainer.LBFGSOptimizer
225 |       .setConvergenceTol($(tol))
226 |       .setNumIterations($(maxIter))
227 |     trainer.setStackSize($(blockSize))
228 |     val mlpModel = trainer.train(data)
229 |     new MultilayerPerceptronClassificationModel(uid, myLayers, mlpModel.weights)
230 |   }
231 | }
232 | 
233 | /**
234 |  * Classification model based on the Multilayer Perceptron.
235 |  * Each layer has sigmoid activation function, output layer has softmax.
236 |  *
237 |  * @param uid uid
238 |  * @param layers array of layer sizes including input and output layers
239 |  * @param weights vector of initial weights for the model that consists of the weights of layers
240 |  * @return prediction model
241 |  */
242 | class MultilayerPerceptronClassificationModel private[ml] (override val uid: String,
243 |                                                            val layers: Array[Int],
244 |                                                            val weights: Vector)
245 |   extends PredictionModel[Vector, MultilayerPerceptronClassificationModel]
246 |     with Serializable {
247 | 
248 |   override val numFeatures: Int = layers.head
249 | 
250 |   private val mlpModel = FeedForwardTopology.multiLayerPerceptron(layers, true).model(weights)
251 | 
252 |   /**
253 |    * Returns layers in a Java List.
254 |    */
255 |   private[ml] def javaLayers: java.util.List[Int] = {
256 |     layers.toList.asJava
257 |   }
258 | 
259 |   /**
260 |    * Predict label for the given features.
261 |    * This internal method is used to implement [[transform()]] and output [[predictionCol]].
262 |    */
263 |   override protected def predict(features: Vector): Double = {
264 |     LabelConverter.decodeLabel(mlpModel.predict(features))
265 |   }
266 | 
267 |   override def copy(extra: ParamMap): MultilayerPerceptronClassificationModel = {
268 |     copyValues(new MultilayerPerceptronClassificationModel(uid, layers, weights), extra)
269 |   }
270 | }
271 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/ml/scaladl/StackedAutoencoder.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.ml.scaladl
 19 | 
 20 | import breeze.linalg.{DenseVector => BDV}
 21 | import org.apache.spark.ml.{Estimator, Model}
 22 | import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 23 | import org.apache.spark.ml.param.{BooleanParam, ParamMap, Params}
 24 | import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
 25 | import org.apache.spark.ml.util.Identifiable
 26 | import org.apache.spark.sql.{DataFrame, Dataset, Row}
 27 | import org.apache.spark.sql.functions._
 28 | import org.apache.spark.sql.types.{StructField, StructType}
 29 | import org.apache.spark.storage.StorageLevel
 30 | 
 31 | import scaladl.layers.{EmptyLayerWithSquaredError, FeedForwardTopology, FeedForwardTrainer}
 32 | 
 33 | /**
 34 |  * Params for [[StackedAutoencoder]].
 35 |  */
 36 | private[scaladl] trait StackedAutoencoderParams extends Params with HasInputCol with HasOutputCol {
 37 |   /**
 38 |    * True if data is in [0, 1] interval.
 39 |    * Default: false
 40 |     *
 41 |     * @group expertParam
 42 |    */
 43 |   final val dataIn01Interval: BooleanParam = new BooleanParam(this, "dataIn01Interval",
 44 |     "True if data is in [0, 1] interval." +
 45 |       " Sets the layer on the top of the autoencoder: linear + sigmoid (true) " +
 46 |       " or linear (false)")
 47 | 
 48 |   /** @group getParam */
 49 |   final def getDataIn01Interval: Boolean = $(dataIn01Interval)
 50 | 
 51 |   /**
 52 |    * True if one wants to have decoder.
 53 |    * Default: false
 54 |     *
 55 |     * @group expertParam
 56 |    */
 57 |   final val buildDecoder: BooleanParam = new BooleanParam(this, "buildDecoder",
 58 |     "True to produce a decoder.")
 59 | 
 60 |   /** @group getParam */
 61 |   final def getBuildDecoder: Boolean = $(buildDecoder)
 62 | 
 63 |   /**
 64 |    * True to cache the intermediate data in memory. Otherwise disk caching is used.
 65 |    * Default: true
 66 |     *
 67 |     * @group expertParam
 68 |    */
 69 |   final val memoryOnlyCaching: BooleanParam = new BooleanParam(this, "memoryOnlyCaching",
 70 |     "True to cache the intermediate data in memory only.")
 71 | 
 72 |   /** @group getParam */
 73 |   final def getMemoryOnlyCaching: Boolean = $(memoryOnlyCaching)
 74 | 
 75 |   setDefault(dataIn01Interval -> true, buildDecoder -> false, memoryOnlyCaching -> true)
 76 | }
 77 | 
 78 | class StackedAutoencoder (override val uid: String)
 79 |   extends Estimator[StackedAutoencoderModel]
 80 |   with MultilayerPerceptronParams with StackedAutoencoderParams {
 81 | 
 82 |   def this() = this(Identifiable.randomUID("stackedAutoencoder"))
 83 | 
 84 |   /** @group setParam */
 85 |   def setDataIn01Interval(value: Boolean): this.type = set(dataIn01Interval, value)
 86 | 
 87 |   /** @group setParam */
 88 |   def setBuildDecoder(value: Boolean): this.type = set(buildDecoder, value)
 89 | 
 90 |   // TODO: make sure that user understands how to set it. Make correctness check
 91 |   /** @group setParam */
 92 |   def setLayers(value: Array[Int]): this.type = set(layers, value)
 93 | 
 94 |   /** @group setParam */
 95 |   def setBlockSize(value: Int): this.type = set(blockSize, value)
 96 | 
 97 |   /** @group setParam */
 98 |   def setInputCol(value: String): this.type = set(inputCol, value)
 99 | 
100 |   /** @group setParam */
101 |   def setOutputCol(value: String): this.type = set(outputCol, value)
102 | 
103 |   /**
104 |    * Set the maximum number of iterations.
105 |    * Default is 100.
106 |     *
107 |     * @group setParam
108 |    */
109 |   def setMaxIter(value: Int): this.type = set(maxIter, value)
110 | 
111 |   /**
112 |    * Set the convergence tolerance of iterations.
113 |    * Smaller value will lead to higher accuracy with the cost of more iterations.
114 |    * Default is 1E-4.
115 |     *
116 |     * @group setParam
117 |    */
118 |   def setTol(value: Double): this.type = set(tol, value)
119 | 
120 |   /**
121 |    * Set the seed for weights initialization.
122 |     *
123 |     * @group setParam
124 |    */
125 |   def setSeed(value: Long): this.type = set(seed, value)
126 | 
127 |   /**
128 |    * Set the model weights.
129 |     *
130 |     * @group setParam
131 |    */
132 |   def setInitialWeights(value: Vector): this.type = set(initialWeights, value)
133 | 
134 |   /**
135 |    * Fits a model to the input data.
136 |    */
137 |   override def fit(dataset: Dataset[_]): StackedAutoencoderModel = {
138 |     val storageLevel =
139 |       if ($(memoryOnlyCaching)) StorageLevel.MEMORY_ONLY else StorageLevel.DISK_ONLY
140 |     var stackedEncoderOffset = 0
141 |     val stackedEncoderWeights = if (!this.isSet(this.initialWeights)) {
142 |       val size =
143 |         FeedForwardTopology.multiLayerPerceptron($(layers)).layers.foldLeft(0)( (b, layer) =>
144 |           b + layer.weightSize)
145 |       new Array[Double](size)
146 |     } else {
147 |       $(initialWeights).toArray
148 |     }
149 |     // decoder if needed
150 |     var stackedDecoderOffset = 0
151 |     val decoderLayers = $(layers).reverse
152 |     val stackedDecoderWeights: Array[Double] = if ($(buildDecoder)) {
153 |       val size =
154 |         FeedForwardTopology.multiLayerPerceptron(decoderLayers).layers.foldLeft(0)( (b, layer) =>
155 |           b + layer.weightSize)
156 |       stackedDecoderOffset = size
157 |       new Array[Double](size)
158 |     } else {
159 |       new Array[Double](0)
160 |     }
161 |     // TODO: use single instance of vectors
162 |     var data = dataset.select($(inputCol)).rdd.map { case Row(x: Vector) => (x, x) }
163 |     var previousData = data
164 |     val linearInput = !$(dataIn01Interval)
165 |     // Train autoencoder for each layer except the last
166 |     for (i <- 0 until $(layers).length - 1) {
167 |       val currentLayers = Array($(layers)(i), $(layers)(i + 1), $(layers)(i))
168 |       val currentTopology = FeedForwardTopology.multiLayerPerceptron(currentLayers, false)
169 |       val isLastLayer = i == $(layers).length - 2
170 |       val isFirstLayer = i == 0
171 |       if (isFirstLayer && linearInput) {
172 |         currentTopology.layers(currentTopology.layers.length - 1) = new EmptyLayerWithSquaredError()
173 |       }
174 |       val FeedForwardTrainer =
175 |         new FeedForwardTrainer(currentTopology, currentLayers(0), currentLayers.last)
176 |           .setStackSize($(blockSize))
177 |           .setSeed($(seed))
178 |       FeedForwardTrainer.LBFGSOptimizer
179 |         .setConvergenceTol($(tol))
180 |         .setNumIterations($(maxIter))
181 |       val currentModel = FeedForwardTrainer.train(data)
182 |       val currentWeights = currentModel.weights.toArray
183 |       val encoderWeightSize = currentTopology.layers(0).weightSize
184 |       System.arraycopy(
185 |         currentWeights, 0, stackedEncoderWeights, stackedEncoderOffset, encoderWeightSize)
186 |       stackedEncoderOffset += encoderWeightSize
187 |       // input data for the next autoencoder in the stack
188 |       if (!isLastLayer) { // intermediate layers
189 |         val encoderTopology = FeedForwardTopology.multiLayerPerceptron(currentLayers.init, false)
190 |         // Due to Vector inefficiency it will copy weights
191 |         val encoderModel = encoderTopology.model(
192 |           Vectors.fromBreeze(new BDV[Double](currentWeights, 0, 1, encoderWeightSize)))
193 |         // TODO: perform block operations
194 |         previousData = data
195 |         data = data.map { x =>
196 |           val y = encoderModel.predict(x._1)
197 |           (y, y)
198 |         }
199 |         // persist and materialize the intermediate data
200 |         data.persist(storageLevel)
201 |         data.count()
202 |         // unpersist the data that is persisted inside the loop
203 |         if (!isFirstLayer) previousData.unpersist()
204 |       } else { // last layer
205 |         // unpersist the data that remains from the last intermediate layer
206 |         if (!isFirstLayer) data.unpersist()
207 |       }
208 |       // if needs decoder
209 |       if ($(buildDecoder)) {
210 |         val decoderWeightSize = currentWeights.length - encoderWeightSize
211 |         stackedDecoderOffset -= decoderWeightSize
212 |         System.arraycopy(currentWeights, encoderWeightSize, stackedDecoderWeights,
213 |           stackedDecoderOffset, decoderWeightSize)
214 |       }
215 |     }
216 |     new StackedAutoencoderModel(uid + "model", $(layers), Vectors.dense(stackedEncoderWeights),
217 |       Vectors.dense(stackedDecoderWeights), linearInput)
218 |   }
219 | 
220 |   override def copy(extra: ParamMap): Estimator[StackedAutoencoderModel] = defaultCopy(extra)
221 | 
222 |   /**
223 |    * :: DeveloperApi ::
224 |    *
225 |    * Derives the output schema from the input schema.
226 |    */
227 |   override def transformSchema(schema: StructType): StructType = {
228 |     val inputType = schema($(inputCol)).dataType
229 |     require(inputType.isInstanceOf[VectorUDT],
230 |       s"Input column ${$(inputCol)} must be a vector column")
231 |     require(!schema.fieldNames.contains($(outputCol)),
232 |       s"Output column ${$(outputCol)} already exists.")
233 |     val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
234 |     StructType(outputFields)
235 |   }
236 | }
237 | 
238 | class StackedAutoencoderModel private[ml] (
239 |     override val uid: String,
240 |     val layers: Array[Int],
241 |     val encoderWeights: Vector,
242 |     val decoderWeights: Vector,
243 |     linearOutput: Boolean) extends Model[StackedAutoencoderModel] with StackedAutoencoderParams {
244 | 
245 |   /** @group setParam */
246 |   def setInputCol(value: String): this.type = set(inputCol, value)
247 | 
248 |   /** @group setParam */
249 |   def setOutputCol(value: String): this.type = set(outputCol, value)
250 | 
251 |   private val encoderModel = {
252 |     val topology = FeedForwardTopology.multiLayerPerceptron(layers, false)
253 |     topology.model(encoderWeights)
254 |   }
255 | 
256 |   private val decoderModel = {
257 |     if (decoderWeights != null && decoderWeights.size > 0) {
258 |       val topology = FeedForwardTopology.multiLayerPerceptron(layers.reverse, false)
259 |       if (linearOutput) {
260 |         topology.layers(topology.layers.length - 1) = new EmptyLayerWithSquaredError()
261 |       }
262 |       topology.model(decoderWeights)
263 |     } else {
264 |       null
265 |     }
266 |   }
267 | 
268 |   override def copy(extra: ParamMap): StackedAutoencoderModel = {
269 |     copyValues(
270 |       new StackedAutoencoderModel(uid, layers, encoderWeights, decoderWeights, linearOutput), extra)
271 |   }
272 | 
273 |   /**
274 |    * Transforms the input dataset.
275 |    */
276 |   override def transform(dataset: Dataset[_]): DataFrame = {
277 |     transformSchema(dataset.schema, logging = true)
278 |     val pcaOp = udf { encoderModel.predict _ }
279 |     dataset.withColumn($(outputCol), pcaOp(col($(inputCol))))
280 |   }
281 | 
282 |   def encode(dataset: DataFrame): DataFrame = transform(dataset)
283 | 
284 |   def decode(dataset: DataFrame): DataFrame = {
285 |     // TODO: show something if no decoder
286 |     transformSchema(dataset.schema, logging = true)
287 |     val pcaOp = udf { decoderModel.predict _ }
288 |     dataset.withColumn($(outputCol), pcaOp(col($(inputCol))))
289 |   }
290 | 
291 |   /**
292 |    * :: DeveloperApi ::
293 |    *
294 |    * Derives the output schema from the input schema.
295 |    */
296 |   override def transformSchema(schema: StructType): StructType = {
297 |     val inputType = schema($(inputCol)).dataType
298 |     require(inputType.isInstanceOf[VectorUDT],
299 |       s"Input column ${$(inputCol)} must be a vector column")
300 |     require(!schema.fieldNames.contains($(outputCol)),
301 |       s"Output column ${$(outputCol)} already exists.")
302 |     val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
303 |     StructType(outputFields)
304 |   }
305 | }
306 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/examples/MnistClassification.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.examples
19 | 
20 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
21 | import org.apache.spark.ml.scaladl.MultilayerPerceptronClassifier
22 | import org.apache.spark.sql.SparkSession
23 | 
24 | object MnistClassification {
25 | 
26 |   def main(args: Array[String]): Unit = {
27 |     if (args.length != 1) {
28 |       System.exit(0)
29 |     }
30 |     val mnistPath = args(0)
31 |     val spark = SparkSession.builder
32 |       .appName("my-spark-app")
33 |       .config("spark.sql.warehouse.dir", "warehouse-temp")
34 |       .getOrCreate()
35 |     val mnistTrain = mnistPath + "/mnist.scale"
36 |     val mnistTest = mnistPath + "/mnist.scale.t"
37 |     // Load the data stored in LIBSVM format as a DataFrame.
38 |     // MNIST handwritten recognition data
39 |     // https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html
40 |     val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist()
41 |     val test = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTest).persist()
42 |     // materialize data lazily persisted in memory
43 |     train.count()
44 |     test.count()
45 |     // specify layers for the neural network:
46 |     // input layer of size 784 (features), one hidden layer of size 100
47 |     // and output of size 10 (classes)
48 |     val layers = Array[Int](784, 32, 10)
49 |     // create the trainer and set its parameters
50 |     val trainer = new MultilayerPerceptronClassifier()
51 |       .setLayers(layers)
52 |       .setBlockSize(128)
53 |       .setSeed(1234L)
54 |       .setMaxIter(100)
55 |     // train the model
56 |     val model = trainer.fit(train)
57 |     // compute accuracy on the test set
58 |     val result = model.transform(test)
59 |     val predictionAndLabels = result.select("prediction", "label")
60 |     val evaluator = new MulticlassClassificationEvaluator()
61 |       .setMetricName("accuracy")
62 |     // scalastyle:off
63 |     println("Accuracy: " + evaluator.evaluate(predictionAndLabels))
64 |     // scalastyle:on
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/examples/MnistEncoding.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.examples
19 | 
20 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
21 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier, StackedAutoencoder}
22 | import org.apache.spark.sql.SparkSession
23 | 
24 | object MnistEncoding {
25 | 
26 |   def main(args: Array[String]): Unit = {
27 |     if (args.length != 1) {
28 |       System.exit(0)
29 |     }
30 |     val mnistPath = args(0)
31 |     val spark = SparkSession.builder
32 |       .appName("my-spark-app")
33 |       .config("spark.sql.warehouse.dir", "warehouse-temp")
34 |       .getOrCreate()
35 |     val mnistTrain = mnistPath + "/mnist.scale"
36 |     val mnistTest = mnistPath + "/mnist.scale.t"
37 |     // Load the data stored in LIBSVM format as a DataFrame.
38 |     // MNIST handwritten recognition data
39 |     // https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html
40 |     val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist()
41 |     val test = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTest).persist()
42 |     // materialize data lazily persisted in memory
43 |     train.count()
44 |     test.count()
45 |     // specify layers for the neural network:
46 |     // input layer of size 784 (features), one hidden layer of size 100
47 |     // and output of size 10 (classes)
48 |     val layers = Array[Int](784, 32, 10)
49 |     // create autoencoder and decode with one hidden layer of 32 neurons
50 |     val stackedAutoencoder = new StackedAutoencoder()
51 |       .setLayers(layers.init)
52 |       .setBlockSize(128)
53 |       .setMaxIter(1)
54 |       .setSeed(333L)
55 |       .setTol(1e-6)
56 |       .setInputCol("features")
57 |       .setOutputCol("output")
58 |       .setDataIn01Interval(true)
59 |       .setBuildDecoder(false)
60 |     val saModel = stackedAutoencoder.fit(train)
61 |     val autoWeights = saModel.encoderWeights
62 |     val trainer = new MultilayerPerceptronClassifier()
63 |       .setLayers(layers)
64 |       .setBlockSize(128)
65 |       .setSeed(123456789L)
66 |       .setMaxIter(1)
67 |       .setTol(1e-6)
68 |     val initialWeights = trainer.fit(train).weights
69 |     System.arraycopy(
70 |       autoWeights.toArray, 0, initialWeights.toArray, 0, autoWeights.toArray.length)
71 |     trainer
72 |       .setInitialWeights(initialWeights)
73 |       .setMaxIter(10)
74 |       .setTol(1e-6)
75 |     val model = trainer.fit(train)
76 |     val result = model.transform(test)
77 |     val predictionAndLabels = result.select("prediction", "label")
78 |     val evaluator = new MulticlassClassificationEvaluator()
79 |       .setMetricName("accuracy")
80 |     // scalastyle:off
81 |     println("Accuracy: " + evaluator.evaluate(predictionAndLabels))
82 |     // scalastyle:on
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/layers/Layer.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.layers
 19 | 
 20 | import java.util.Random
 21 | 
 22 | import org.apache.spark.ml.linalg.{Vector, Vectors}
 23 | import org.apache.spark.rdd.RDD
 24 | 
 25 | import scaladl.layers.AnnTypes._
 26 | import scaladl.optimization._
 27 | import scaladl.tensor.DenseTensor
 28 | 
 29 | object AnnTypes {
 30 |   type Tensor = DenseTensor[Double]
 31 | }
 32 | 
 33 | /**
 34 |  * Trait that holds Layer properties, that are needed to instantiate it.
 35 |  * Implements Layer instantiation.
 36 |  *
 37 |  */
 38 | private[layers] trait Layer extends Serializable {
 39 | 
 40 |   /**
 41 |    * Number of weights that is used to allocate memory for the weights vector
 42 |    */
 43 |   val weightSize: Int
 44 | 
 45 |   /**
 46 |    * Returns the output size given the input size (not counting the stack size).
 47 |    * Output size is used to allocate memory for the output.
 48 |    *
 49 |    * @param inputSize input size
 50 |    * @return output size
 51 |    */
 52 |   def outputSize(inputSize: Int): Int
 53 | 
 54 |   /**
 55 |    * If true, the memory is not allocated for the output of this layer.
 56 |    * The memory allocated to the previous layer is used to write the output of this layer.
 57 |    * Developer can set this to true if computing delta of a previous layer
 58 |    * does not involve its output, so the current layer can write there.
 59 |    * This also mean that both layers have the same number of outputs.
 60 |    */
 61 |   val inPlace: Boolean
 62 | 
 63 |   /**
 64 |    * Returns the instance of the layer based on weights provided.
 65 |    * Size of weights must be equal to weightSize
 66 |    *
 67 |    * @param weights vector with layer weights
 68 |    * @return the layer model
 69 |    */
 70 |   def model(weights: Tensor): LayerModel
 71 |   /**
 72 |    * Returns the instance of the layer with random generated weights
 73 |    *
 74 |    * @param weights vector for weights initialization, must be equal to weightSize
 75 |    * @param random random number generator
 76 |    * @return the layer model
 77 |    */
 78 |   def initModel(weights: Tensor, random: Random): LayerModel
 79 | }
 80 | 
 81 | /**
 82 |  * Trait that holds Layer weights (or parameters).
 83 |  * Implements functions needed for forward propagation, computing delta and gradient.
 84 |  * Can return weights in Vector format.
 85 |  */
 86 | private[layers] trait LayerModel extends Serializable {
 87 | 
 88 |   val weights: Tensor
 89 |   /**
 90 |    * Evaluates the data (process the data through the layer)
 91 |    *
 92 |    * @param data data
 93 |    * @param output output to write to
 94 |    */
 95 |   def eval(data: Tensor, output: Tensor): Unit
 96 | 
 97 |   /**
 98 |    * Computes the delta for back propagation
 99 |    *
100 |    * @param delta delta of this layer
101 |    * @param output output of this layer
102 |    * @param pDelta storage for the result, the previous delta
103 |    * @return delta
104 |    */
105 |   def prevDelta(delta: Tensor, output: Tensor, pDelta: Tensor): Unit
106 | 
107 |   /**
108 |    * Computes the gradient
109 |    *
110 |    * @param delta delta for this layer
111 |    * @param input input data
112 |    * @param cumGrad cumulative gradient
113 |    * @return gradient
114 |    */
115 |   def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit
116 | }
117 | 
118 | /**
119 |  * Layer properties of affine transformations, that is y=A*x+b
120 |  *
121 |  * @param numIn number of inputs
122 |  * @param numOut number of outputs
123 |  */
124 | private[layers] class AffineLayer(val numIn: Int, val numOut: Int) extends Layer {
125 | 
126 |   override val weightSize = numIn * numOut + numOut
127 | 
128 |   override def outputSize(inputSize: Int): Int = numOut
129 | 
130 |   override val inPlace = false
131 | 
132 |   override def model(weights: Tensor): LayerModel = new AffineLayerModel(weights, this)
133 | 
134 |   override def initModel(weights: Tensor, random: Random): LayerModel =
135 |     AffineLayerModel(this, weights, random)
136 | }
137 | 
138 | /**
139 |  * Model of Affine layer
140 |  *
141 |  * @param weights weights
142 |  * @param layer layer properties
143 |  */
144 | private[layers] class AffineLayerModel private[layers](
145 |                                                    val weights: Tensor,
146 |                                                    val layer: AffineLayer) extends LayerModel {
147 |   val w = DenseTensor(weights.data, Array(layer.numOut, layer.numIn), weights.offset)
148 |   val b = DenseTensor(weights.data, Array(layer.numOut),
149 |     weights.offset + (layer.numOut * layer.numIn))
150 | 
151 |   private var ones: Tensor = null
152 | 
153 |   override def eval(data: Tensor, output: Tensor): Unit = {
154 |     output.fillWith(b)
155 |     DenseTensor.gemm(1.0, w, data, 1.0, output)
156 |   }
157 | 
158 |   override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {
159 |     DenseTensor.gemm(1.0, w.transpose, nextDelta, 0.0, delta)
160 |   }
161 | 
162 |   override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {
163 |     // compute gradient of weights
164 |     val cumGradientOfWeights = DenseTensor(cumGrad.data, w.shape, cumGrad.offset)
165 |     DenseTensor.gemm(1.0 / input.shape(1), delta, input.transpose, 1.0, cumGradientOfWeights)
166 |     if (ones == null || ones.shape(0) != delta.shape(1)) ones =
167 |       DenseTensor.fill(Array(delta.shape(1)))(1)
168 | 
169 |     // compute gradient of bias
170 |     val cumGradientOfBias = DenseTensor(cumGrad.data, Array(b.shape(0)), cumGrad.offset + w.size)
171 |     DenseTensor.gemv(1.0 / input.shape(1), delta, ones, 1.0, cumGradientOfBias)
172 |   }
173 | }
174 | 
175 | /**
176 |  * Fabric for Affine layer models
177 |  */
178 | private[layers] object AffineLayerModel {
179 | 
180 |   /**
181 |    * Creates a model of Affine layer
182 |    *
183 |    * @param layer layer properties
184 |    * @param weights vector for weights initialization
185 |    * @param random random number generator
186 |    * @return model of Affine layer
187 |    */
188 |   def apply(layer: AffineLayer, weights: Tensor, random: Random): AffineLayerModel = {
189 |     randomWeights(layer.numIn, layer.numOut, weights, random)
190 |     new AffineLayerModel(weights, layer)
191 |   }
192 | 
193 |   /**
194 |    * Initialize weights
195 |    *
196 |    * @param numIn number of inputs
197 |    * @param numOut number of outputs
198 |    * @param weights vector for weights initialization
199 |    * @param random random number generator
200 |    */
201 |   def randomWeights(
202 |                      numIn: Int,
203 |                      numOut: Int,
204 |                      weights: Tensor,
205 |                      random: Random): Unit = {
206 |     var i = 0
207 |     val sz = weights.size
208 |     while (i < sz) {
209 |       weights.update(i, (random.nextDouble * 4.8 - 2.4) / numIn)
210 |       i += 1
211 |     }
212 |   }
213 | }
214 | 
215 | /**
216 |  * Trait for functions and their derivatives for functional layers
217 |  */
218 | private[layers] trait ActivationFunction extends Serializable {
219 | 
220 |   /**
221 |    * Implements a function
222 |    */
223 |   def eval: Double => Double
224 | 
225 |   /**
226 |    * Implements a derivative of a function (needed for the back propagation)
227 |    */
228 |   def derivative: Double => Double
229 | }
230 | 
231 | /**
232 |  * Implements Sigmoid activation function
233 |  */
234 | private[layers] class SigmoidFunction extends ActivationFunction {
235 | 
236 |   override def eval: (Double) => Double = x => 1.0 / (1 + Math.exp(-x))
237 | 
238 |   override def derivative: (Double) => Double = z => (1 - z) * z
239 | }
240 | 
241 | /**
242 |  * Functional layer properties, y = f(x)
243 |  *
244 |  * @param activationFunction activation function
245 |  */
246 | private[layers] class FunctionalLayer(val activationFunction: ActivationFunction) extends Layer {
247 | 
248 |   override val weightSize = 0
249 | 
250 |   override def outputSize(inputSize: Int): Int = inputSize
251 | 
252 |   override val inPlace = true
253 | 
254 |   override def model(weights: Tensor): LayerModel = new FunctionalLayerModel(this)
255 | 
256 |   override def initModel(weights: Tensor, random: Random): LayerModel = model(weights)
257 | }
258 | 
259 | /**
260 |  * Functional layer model. Holds no weights.
261 |  *
262 |  * @param layer functiona layer
263 |  */
264 | private[layers] class FunctionalLayerModel private[layers](val layer: FunctionalLayer)
265 |   extends LayerModel {
266 | 
267 |   // empty weights
268 |   val weights: Tensor = DenseTensor(Array(0))
269 | 
270 |   override def eval(data: Tensor, output: Tensor): Unit = {
271 |     DenseTensor.applyFunction(data, output, layer.activationFunction.eval)
272 |   }
273 | 
274 |   override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {
275 |     DenseTensor.applyFunction(input, delta, layer.activationFunction.derivative)
276 |     DenseTensor.elementwiseProduct(delta, nextDelta)
277 |   }
278 | 
279 |   override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {}
280 | }
281 | 
282 | /**
283 |  * Trait for the artificial neural network (ANN) topology properties
284 |  */
285 | private[layers] trait Topology extends Serializable {
286 |   def model(weights: Vector): TopologyModel
287 |   def model(seed: Long): TopologyModel
288 | }
289 | 
290 | /**
291 |  * Trait for ANN topology model
292 |  */
293 | private[layers] trait TopologyModel extends Serializable {
294 | 
295 |   val weights: Vector
296 |   /**
297 |    * Array of layers
298 |    */
299 |   val layers: Array[Layer]
300 | 
301 |   /**
302 |    * Array of layer models
303 |    */
304 |   val layerModels: Array[LayerModel]
305 |   /**
306 |    * Forward propagation
307 |    *
308 |    * @param data input data
309 |    * @return array of outputs for each of the layers
310 |    */
311 |   def forward(data: Tensor): Array[Tensor]
312 | 
313 |   /**
314 |    * Prediction of the model
315 |    *
316 |    * @param data input data
317 |    * @return prediction
318 |    */
319 |   def predict(data: Vector): Vector
320 | 
321 |   /**
322 |    * Computes gradient for the network
323 |    *
324 |    * @param data input data
325 |    * @param target target output
326 |    * @param cumGradient cumulative gradient
327 |    * @param blockSize block size
328 |    * @return error
329 |    */
330 |   def computeGradient(data: Tensor, target: Tensor, cumGradient: Tensor,
331 |                       blockSize: Int): Double
332 | }
333 | 
334 | /**
335 |  * Feed forward ANN
336 |  *
337 |  * @param layers
338 |  */
339 | private[layers] class FeedForwardTopology private(val layers: Array[Layer]) extends Topology {
340 |   override def model(weights: Vector): TopologyModel = FeedForwardModel(this, weights)
341 | 
342 |   override def model(seed: Long): TopologyModel = FeedForwardModel(this, seed)
343 | }
344 | 
345 | /**
346 |  * Factory for some of the frequently-used topologies
347 |  */
348 | object FeedForwardTopology {
349 |   /**
350 |    * Creates a feed forward topology from the array of layers
351 |    *
352 |    * @param layers array of layers
353 |    * @return feed forward topology
354 |    */
355 |   def apply(layers: Array[Layer]): FeedForwardTopology = {
356 |     new FeedForwardTopology(layers)
357 |   }
358 | 
359 |   /**
360 |    * Creates a multi-layer perceptron
361 |    *
362 |    * @param layerSizes sizes of layers including input and output size
363 |    * @param softmaxOnTop wether to use SoftMax or Sigmoid function for an output layer.
364 |    *                Softmax is default
365 |    * @return multilayer perceptron topology
366 |    */
367 |   def multiLayerPerceptron(
368 |                             layerSizes: Array[Int],
369 |                             softmaxOnTop: Boolean = true): FeedForwardTopology = {
370 |     val layers = new Array[Layer]((layerSizes.length - 1) * 2)
371 |     for(i <- 0 until layerSizes.length - 1) {
372 |       layers(i * 2) = new AffineLayer(layerSizes(i), layerSizes(i + 1))
373 |       layers(i * 2 + 1) =
374 |         if (i == layerSizes.length - 2) {
375 |           if (softmaxOnTop) {
376 |             new SoftmaxLayerWithCrossEntropyLoss()
377 |           } else {
378 |             // TODO: squared error is more natural but converges slower
379 |             new SigmoidLayerWithSquaredError()
380 |           }
381 |         } else {
382 |           new FunctionalLayer(new SigmoidFunction())
383 |         }
384 |     }
385 |     FeedForwardTopology(layers)
386 |   }
387 | }
388 | 
389 | /**
390 |  * Model of Feed Forward Neural Network.
391 |  * Implements forward, gradient computation and can return weights in vector format.
392 |  *
393 |  * @param weights network weights
394 |  * @param topology network topology
395 |  */
396 | class FeedForwardModel private(
397 |   val weights: Vector,
398 |   val topology: FeedForwardTopology) extends TopologyModel {
399 |   val layers = topology.layers
400 |   val layerModels = new Array[LayerModel](layers.length)
401 |   private var offset = 0
402 |   for (i <- 0 until layers.length) {
403 |     layerModels(i) = layers(i).model(
404 |      DenseTensor(weights.toArray, Array(layers(i).weightSize), offset))
405 |     offset += layers(i).weightSize
406 |   }
407 |   private var outputs: Array[Tensor] = null
408 |   private var deltas: Array[Tensor] = null
409 | 
410 |   override def forward(data: Tensor): Array[Tensor] = {
411 |     // Initialize output arrays for all layers. Special treatment for InPlace
412 |     val currentBatchSize = data.shape(1)
413 |     // TODO: allocate outputs as one big array and then create BDMs from it
414 |     if (outputs == null || outputs(0).shape(1) != currentBatchSize) {
415 |       outputs = new Array[Tensor](layers.length)
416 |       var inputSize = data.shape(0)
417 |       for (i <- 0 until layers.length) {
418 |         if (layers(i).inPlace) {
419 |           outputs(i) = outputs(i - 1)
420 |         } else {
421 |           val outputSize = layers(i).outputSize(inputSize)
422 |           outputs(i) = DenseTensor(Array(outputSize, currentBatchSize))
423 |           inputSize = outputSize
424 |         }
425 |       }
426 |     }
427 |     layerModels(0).eval(data, outputs(0))
428 |     for (i <- 1 until layerModels.length) {
429 |       layerModels(i).eval(outputs(i - 1), outputs(i))
430 |     }
431 |     outputs
432 |   }
433 | 
434 |   override def computeGradient(
435 |                                 data: Tensor,
436 |                                 target: Tensor,
437 |                                 cumGradient: Tensor,
438 |                                 realBatchSize: Int): Double = {
439 |     val outputs = forward(data)
440 |     val currentBatchSize = data.shape(1)
441 |     // TODO: allocate deltas as one big array and then create BDMs from it
442 |     if (deltas == null || deltas(0).shape(1) != currentBatchSize) {
443 |       deltas = new Array[Tensor](layerModels.length)
444 |       var inputSize = data.shape(0)
445 |       for (i <- 0 until layerModels.length - 1) {
446 |         val outputSize = layers(i).outputSize(inputSize)
447 |         deltas(i) = new Tensor(Array(outputSize, currentBatchSize))
448 |         inputSize = outputSize
449 |       }
450 |     }
451 |     val L = layerModels.length - 1
452 |     // TODO: explain why delta of top layer is null (because it might contain loss+layer)
453 |     val loss = layerModels.last match {
454 |       case levelWithError: LossFunction => levelWithError.loss(outputs.last, target, deltas(L - 1))
455 |       case _ =>
456 |         throw new UnsupportedOperationException("Top layer is required to have objective.")
457 |     }
458 |     for (i <- (L - 2) to (0, -1)) {
459 |       layerModels(i + 1).prevDelta(deltas(i + 1), outputs(i + 1), deltas(i))
460 |     }
461 |     val cumGradientArray = cumGradient.data
462 |     var offset = 0
463 |     for (i <- 0 until layerModels.length) {
464 |       val input = if (i == 0) data else outputs(i - 1)
465 |       layerModels(i).grad(deltas(i), input,
466 |         new Tensor(cumGradientArray, Array(layers(i).weightSize), offset))
467 |       offset += layers(i).weightSize
468 |     }
469 |     loss
470 |   }
471 | 
472 |   override def predict(data: Vector): Vector = {
473 |     val size = data.size
474 |     val result = forward(DenseTensor(data.toArray, Array(size, 1)))
475 |     // TODO: check that it was OK not to clone in the previous version
476 |     Vectors.dense(result.last.data.clone())
477 |   }
478 | }
479 | 
480 | /**
481 |  * Fabric for feed forward ANN models
482 |  */
483 | private[layers] object FeedForwardModel {
484 | 
485 |   /**
486 |    * Creates a model from a topology and weights
487 |    *
488 |    * @param topology topology
489 |    * @param weights weights
490 |    * @return model
491 |    */
492 |   def apply(topology: FeedForwardTopology, weights: Vector): FeedForwardModel = {
493 |     // TODO: check that weights size is equal to sum of layers sizes
494 |     new FeedForwardModel(weights, topology)
495 |   }
496 | 
497 |   /**
498 |    * Creates a model given a topology and seed
499 |    *
500 |    * @param topology topology
501 |    * @param seed seed for generating the weights
502 |    * @return model
503 |    */
504 |   def apply(topology: FeedForwardTopology, seed: Long = 11L): FeedForwardModel = {
505 |     val layers = topology.layers
506 |     val layerModels = new Array[LayerModel](layers.length)
507 |     var totalSize = 0
508 |     for (i <- 0 until topology.layers.length) {
509 |       totalSize += topology.layers(i).weightSize
510 |     }
511 |     val weights: Tensor = DenseTensor(Array(totalSize))
512 |     var offset = 0
513 |     // TODO: check if we can re-use XORShiftRandom
514 |     val random = new Random(seed)
515 |     for(i <- 0 until layers.length) {
516 |       layerModels(i) = layers(i).
517 |         initModel(DenseTensor(weights.data, Array(layers(i).weightSize), offset), random)
518 |       offset += layers(i).weightSize
519 |     }
520 |     new FeedForwardModel(Vectors.dense(weights.data), topology)
521 |   }
522 | }
523 | 
524 | /**
525 |  * Neural network gradient. Does nothing but calling Model's gradient
526 |  *
527 |  * @param topology topology
528 |  * @param dataStacker data stacker
529 |  */
530 | private[layers] class ANNGradient(topology: Topology, dataStacker: DataStacker) extends Gradient {
531 | 
532 |   override def compute(data: Vector, label: Double, weights: Tensor): (Tensor, Double) = {
533 |     val gradient = new Tensor(Array(weights.size))
534 |     val loss = compute(data, label, weights, gradient)
535 |     (gradient, loss)
536 |   }
537 | 
538 |   override def compute(
539 |                         data: Vector,
540 |                         label: Double,
541 |                         weights: Tensor,
542 |                         cumGradient: Tensor): Double = {
543 |     val (input, target, realBatchSize) = dataStacker.unstack(data)
544 |     val model = topology.model(Vectors.dense(weights.data))
545 |     model.computeGradient(input, target, cumGradient, realBatchSize)
546 |   }
547 | }
548 | 
549 | /**
550 |  * Stacks pairs of training samples (input, output) in one vector allowing them to pass
551 |  * through Optimizer/Gradient interfaces. If stackSize is more than one, makes blocks
552 |  * or matrices of inputs and outputs and then stack them in one vector.
553 |  * This can be used for further batch computations after unstacking.
554 |  *
555 |  * @param stackSize stack size
556 |  * @param inputSize size of the input vectors
557 |  * @param outputSize size of the output vectors
558 |  */
559 | private[layers] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int)
560 |   extends Serializable {
561 | 
562 |   /**
563 |    * Stacks the data
564 |    *
565 |    * @param data RDD of vector pairs
566 |    * @return RDD of double (always zero) and vector that contains the stacked vectors
567 |    */
568 |   def stack(data: RDD[(Vector, Vector)]): RDD[(Double, Vector)] = {
569 |     val stackedData = if (stackSize == 1) {
570 |       data.map { v =>
571 |         val bigVector = new Array[Double](v._1.size + v._2.size)
572 |         System.arraycopy(v._1.toArray, 0, bigVector, 0, v._1.size)
573 |         System.arraycopy(v._2.toArray, 0, bigVector, v._1.size, v._2.size)
574 |         (0.0, Vectors.dense(bigVector))
575 |       }
576 |     } else {
577 |       data.mapPartitions { it =>
578 |         it.grouped(stackSize).map { seq =>
579 |           val size = seq.size
580 |           val bigVector = new Array[Double](inputSize * size + outputSize * size)
581 |           var i = 0
582 |           seq.foreach { case (in, out) =>
583 |             System.arraycopy(in.toArray, 0, bigVector, i * inputSize, inputSize)
584 |             System.arraycopy(out.toArray, 0, bigVector,
585 |               inputSize * size + i * outputSize, outputSize)
586 |             i += 1
587 |           }
588 |           (0.0, Vectors.dense(bigVector))
589 |         }
590 |       }
591 |     }
592 |     stackedData
593 |   }
594 | 
595 |   /**
596 |    * Unstack the stacked vectors into matrices for batch operations
597 |    *
598 |    * @param data stacked vector
599 |    * @return pair of matrices holding input and output data and the real stack size
600 |    */
601 |   def unstack(data: Vector): (Tensor, Tensor, Int) = {
602 |     val arrData = data.toArray
603 |     val realStackSize = arrData.length / (inputSize + outputSize)
604 |     val input = DenseTensor(arrData, Array(inputSize, realStackSize))
605 |     val target = DenseTensor(arrData, Array(outputSize, realStackSize), inputSize * realStackSize)
606 |     (input, target, realStackSize)
607 |   }
608 | }
609 | 
610 | /**
611 |  * Simple updater
612 |  */
613 | private[layers] class ANNUpdater extends Updater {
614 | 
615 |   override def compute(
616 |                         weightsOld: Tensor,
617 |                         gradient: Tensor,
618 |                         stepSize: Double,
619 |                         iter: Int,
620 |                         regParam: Double): (Tensor, Double) = {
621 |     val thisIterStepSize = stepSize
622 |     DenseTensor.axpy(-thisIterStepSize, gradient, weightsOld)
623 |     (weightsOld, 0)
624 |   }
625 | }
626 | 
627 | /**
628 |  * MLlib-style trainer class that trains a network given the data and topology
629 |  *
630 |  * @param topology topology of ANN
631 |  * @param inputSize input size
632 |  * @param outputSize output size
633 |  */
634 | class FeedForwardTrainer(
635 |                                       topology: Topology,
636 |                                       val inputSize: Int,
637 |                                       val outputSize: Int) extends Serializable {
638 | 
639 |   private var _seed = 11L
640 |   private var _weights: Vector = null
641 |   private var _stackSize = 128
642 |   private var dataStacker = new DataStacker(_stackSize, inputSize, outputSize)
643 |   private var _gradient: Gradient = new ANNGradient(topology, dataStacker)
644 |   private var _updater: Updater = new ANNUpdater()
645 |   private var optimizer: Optimizer = LBFGSOptimizer.setConvergenceTol(1e-4).setNumIterations(100)
646 | 
647 |   /**
648 |    * Returns seed
649 |    *
650 |    * @return seed
651 |    */
652 |   def getSeed: Long = _seed
653 | 
654 |   /**
655 |    * Sets seed
656 |    *
657 |    * @param value seed
658 |    * @return trainer
659 |    */
660 |   def setSeed(value: Long): FeedForwardTrainer = {
661 |     _seed = value
662 |     this
663 |   }
664 | 
665 |   /**
666 |    * Returns weights
667 |    *
668 |    * @return weights
669 |    */
670 |   def getWeights: Vector = _weights
671 | 
672 |   /**
673 |    * Sets weights
674 |    *
675 |    * @param value weights
676 |    * @return trainer
677 |    */
678 |   def setWeights(value: Vector): FeedForwardTrainer = {
679 |     _weights = value
680 |     this
681 |   }
682 | 
683 |   /**
684 |    * Sets the stack size
685 |    *
686 |    * @param value stack size
687 |    * @return trainer
688 |    */
689 |   def setStackSize(value: Int): FeedForwardTrainer = {
690 |     _stackSize = value
691 |     dataStacker = new DataStacker(value, inputSize, outputSize)
692 |     this
693 |   }
694 | 
695 |   /**
696 |    * Sets the SGD optimizer
697 |    *
698 |    * @return SGD optimizer
699 |    */
700 |   def SGDOptimizer: GradientDescent = {
701 |     val sgd = new GradientDescent(_gradient, _updater)
702 |     optimizer = sgd
703 |     sgd
704 |   }
705 | 
706 |   /**
707 |    * Sets the LBFGS optimizer
708 |    *
709 |    * @return LBGS optimizer
710 |    */
711 |   def LBFGSOptimizer: LBFGS = {
712 |     val lbfgs = new LBFGS(_gradient, _updater)
713 |     optimizer = lbfgs
714 |     lbfgs
715 |   }
716 | 
717 |   /**
718 |    * Sets the updater
719 |    *
720 |    * @param value updater
721 |    * @return trainer
722 |    */
723 |   def setUpdater(value: Updater): FeedForwardTrainer = {
724 |     _updater = value
725 |     updateUpdater(value)
726 |     this
727 |   }
728 | 
729 |   /**
730 |    * Sets the gradient
731 |    *
732 |    * @param value gradient
733 |    * @return trainer
734 |    */
735 |   def setGradient(value: Gradient): FeedForwardTrainer = {
736 |     _gradient = value
737 |     updateGradient(value)
738 |     this
739 |   }
740 | 
741 |   private[this] def updateGradient(gradient: Gradient): Unit = {
742 |     optimizer match {
743 |       case lbfgs: LBFGS => lbfgs.setGradient(gradient)
744 |       case sgd: GradientDescent => sgd.setGradient(gradient)
745 |       case other => throw new UnsupportedOperationException(
746 |         s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.")
747 |     }
748 |   }
749 | 
750 |   private[this] def updateUpdater(updater: Updater): Unit = {
751 |     optimizer match {
752 |       case lbfgs: LBFGS => lbfgs.setUpdater(updater)
753 |       case sgd: GradientDescent => sgd.setUpdater(updater)
754 |       case other => throw new UnsupportedOperationException(
755 |         s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.")
756 |     }
757 |   }
758 | 
759 |   /**
760 |    * Trains the ANN
761 |    *
762 |    * @param data RDD of input and output vector pairs
763 |    * @return model
764 |    */
765 |   def train(data: RDD[(Vector, Vector)]): TopologyModel = {
766 |     val w = if (getWeights == null) {
767 |       // TODO: will make a copy if vector is a subvector of BDV (see Vectors code)
768 |       topology.model(_seed).weights
769 |     } else {
770 |       getWeights
771 |     }
772 |     // TODO: deprecate standard optimizer because it needs Vector
773 |     val newWeights = optimizer.optimize(dataStacker.stack(data),
774 |       new Tensor(w.toArray, Array(w.size), 0))
775 |     topology.model(Vectors.dense(newWeights.data))
776 |   }
777 | }
778 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/layers/LossFunction.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.layers
 19 | 
 20 | import java.util.Random
 21 | 
 22 | import scaladl.layers.AnnTypes._
 23 | import scaladl.tensor.DenseTensor
 24 | 
 25 | /**
 26 |  * Trait for loss function
 27 |  */
 28 | private[layers] trait LossFunction {
 29 |   /**
 30 |    * Loss function
 31 |    *
 32 |    * @param output actual output
 33 |    * @param target target output
 34 |    * @param delta output delta to write to
 35 |    * @return
 36 |    */
 37 |   def loss(output: Tensor, target: Tensor, delta: Tensor): Double
 38 | }
 39 | 
 40 | class SigmoidLayerWithSquaredError extends Layer {
 41 |   override val weightSize = 0
 42 |   override def outputSize(inputSize: Int): Int = inputSize
 43 |   override val inPlace = true
 44 |   override def model(weights: Tensor): LayerModel = new SigmoidLayerModelWithSquaredError()
 45 |   override def initModel(weights: Tensor, random: Random): LayerModel =
 46 |     new SigmoidLayerModelWithSquaredError()
 47 | }
 48 | 
 49 | private[layers] class SigmoidLayerModelWithSquaredError
 50 |   extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction {
 51 |   override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = {
 52 |     DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t)
 53 |     val error = (delta :* delta).sum / 2 / output.shape(1)
 54 |     DenseTensor.applyFunction(delta, output, delta, (x: Double, o: Double) => x * (o - o * o))
 55 |     error
 56 |   }
 57 | }
 58 | 
 59 | class SoftmaxLayerWithCrossEntropyLoss extends Layer {
 60 |   override val weightSize = 0
 61 |   override def outputSize(inputSize: Int): Int = inputSize
 62 |   override val inPlace = true
 63 |   override def model(weights: Tensor): LayerModel =
 64 |     new SoftmaxLayerModelWithCrossEntropyLoss()
 65 |   override def initModel(weights: Tensor, random: Random): LayerModel =
 66 |     new SoftmaxLayerModelWithCrossEntropyLoss()
 67 | }
 68 | 
 69 | private[layers] class SoftmaxLayerModelWithCrossEntropyLoss extends LayerModel with LossFunction {
 70 | 
 71 |   private val epsilon = 1e-15
 72 |   private var epsilonMatrix: Tensor = null
 73 | 
 74 |   val weights: Tensor = DenseTensor(Array(0))
 75 | 
 76 |   def inplaceEval(x: Tensor, y: Tensor): Unit = {
 77 |     require(x.shape.length == 2 && y.shape.length == 2
 78 |       && x.shape(0) == y.shape(0) && x.shape(1) == y.shape(1),
 79 |       "X and Y must be 2 dim and of equal size")
 80 |     var j = 0
 81 |     // find max value to make sure later that exponent is computable
 82 |     while (j < x.shape(1)) {
 83 |       var i = 0
 84 |       var max = Double.MinValue
 85 |       while (i < x.shape(0)) {
 86 |         if (x.value(Array(i, j)) > max) {
 87 |           max = x.value(Array(i, j))
 88 |         }
 89 |         i += 1
 90 |       }
 91 |       var sum = 0.0
 92 |       i = 0
 93 |       while (i < x.shape(0)) {
 94 |         val res = Math.exp(x.value(Array(i, j)) - max)
 95 |         y.update(Array(i, j), res)
 96 |         sum += res
 97 |         i += 1
 98 |       }
 99 |       i = 0
100 |       while (i < x.shape(0)) {
101 |         val avg = y.value(Array(i, j)) / sum
102 |         y.update(Array(i, j), avg)
103 |         i += 1
104 |       }
105 |       j += 1
106 |     }
107 |   }
108 | 
109 |   override def eval(data: Tensor, output: Tensor): Unit = {
110 |     inplaceEval(data, output)
111 |   }
112 |   override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {}
113 | 
114 |   override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {}
115 | 
116 |   override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = {
117 |     if (epsilonMatrix == null || epsilonMatrix.shape(1) != target.shape(1)) {
118 |       epsilonMatrix = DenseTensor.fill(target.shape)(epsilon)
119 |     }
120 |     DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t)
121 |     val temp = output + epsilonMatrix
122 |     DenseTensor.applyFunction(temp, Math.log)
123 |     -(target :* temp).sum / output.shape(1)
124 |   }
125 | }
126 | 
127 | class EmptyLayerWithSquaredError extends Layer {
128 |   override val weightSize = 0
129 |   override def outputSize(inputSize: Int): Int = inputSize
130 |   override val inPlace = true
131 |   override def model(weights: Tensor): LayerModel =
132 |     new EmptyLayerModelWithSquaredError()
133 |   override def initModel(weights: Tensor, random: Random): LayerModel =
134 |     new EmptyLayerModelWithSquaredError()
135 | }
136 | 
137 | private[layers] class EmptyLayerModelWithSquaredError extends LayerModel with LossFunction {
138 | 
139 |   val weights: Tensor = DenseTensor(Array(0))
140 | 
141 |   override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = {
142 |     DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t)
143 |     (delta :* delta).sum / 2 / output.shape(1)
144 |   }
145 | 
146 |   override def eval(data: Tensor, output: Tensor): Unit = {}
147 |   override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {}
148 |   override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {}
149 | }
150 | 
151 | class SigmoidLayerWithCrossEntropyLoss extends Layer {
152 |   override val weightSize = 0
153 |   override def outputSize(inputSize: Int): Int = inputSize
154 |   override val inPlace = true
155 |   override def model(weights: Tensor): LayerModel =
156 |     new SigmoidLayerModelWithCrossEntropyLoss()
157 |   override def initModel(weights: Tensor, random: Random): LayerModel =
158 |     new SigmoidLayerModelWithCrossEntropyLoss()
159 | }
160 | 
161 | private[layers] class SigmoidLayerModelWithCrossEntropyLoss
162 |   extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction {
163 |   // TODO: make a common place where ones matrices reside
164 |   private var oneMatrix: Tensor = null
165 |   private val epsilon = 1e-15
166 |   private var epsilonMatrix: Tensor = null
167 | 
168 |   override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = {
169 |     if (oneMatrix == null || oneMatrix.shape(1) != target.shape(1)) {
170 |       oneMatrix = DenseTensor.fill(target.shape)(1)
171 |     }
172 |     if (epsilonMatrix == null || epsilonMatrix.shape(1) != target.shape(1)) {
173 |       epsilonMatrix = DenseTensor.fill(target.shape)(epsilon)
174 |     }
175 |     DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t)
176 |     // NB: operation :* don't have execution priority over summation
177 |     // TODO: is adding epsilon a good way to fight log(o) ?
178 |     val temp1 = output + epsilonMatrix;
179 |     DenseTensor.applyFunction(temp1, Math.log)
180 |     val temp2 = oneMatrix - output + epsilonMatrix
181 |     DenseTensor.applyFunction(temp2, Math.log)
182 |     -((target :* temp1) + ((oneMatrix - target) :* temp2)).sum / output.shape(1)
183 |   }
184 | }
185 | 
186 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/optimization/Gradient.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.optimization
19 | 
20 | import org.apache.spark.ml.linalg.Vector
21 | 
22 | import scaladl.layers.AnnTypes.Tensor
23 | 
24 | /**
25 |  * :: DeveloperApi ::
26 |  * Class used to compute the gradient for a loss function, given a single data point.
27 |  */
28 | abstract class Gradient extends Serializable {
29 |   /**
30 |    * Compute the gradient and loss given the features of a single data point.
31 |    *
32 |    * @param data features for one data point
33 |    * @param label label for this data point
34 |    * @param weights weights/coefficients corresponding to features
35 |     * @return (gradient: Vector, loss: Double)
36 |    */
37 |   def compute(data: Vector, label: Double, weights: Tensor): (Tensor, Double) = {
38 |     val gradient = new Tensor(Array(weights.size))
39 |     val loss = compute(data, label, weights, gradient)
40 |     (gradient, loss)
41 |   }
42 | 
43 |   /**
44 |    * Compute the gradient and loss given the features of a single data point,
45 |    * add the gradient to a provided vector to avoid creating new objects, and return loss.
46 |    *
47 |    * @param data features for one data point
48 |    * @param label label for this data point
49 |    * @param weights weights/coefficients corresponding to features
50 |    * @param cumGradient the computed gradient will be added to this vector
51 |     * @return loss
52 |    */
53 |   def compute(data: Vector, label: Double, weights: Tensor, cumGradient: Tensor): Double
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/optimization/GradientDescent.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.optimization
 19 | 
 20 | import scala.collection.mutable.ArrayBuffer
 21 | 
 22 | import org.apache.log4j.{Level, LogManager}
 23 | import org.apache.spark.ml.linalg.Vector
 24 | import org.apache.spark.rdd.RDD
 25 | 
 26 | import scaladl.layers.AnnTypes.Tensor
 27 | import scaladl.tensor.DenseTensor
 28 | 
 29 | /**
 30 |  * Class used to solve an optimization problem using Gradient Descent.
 31 |  *
 32 |  * @param gradient Gradient function to be used.
 33 |  * @param updater Updater to be used to update weights after every iteration.
 34 |  */
 35 | class GradientDescent (private var gradient: Gradient, private var updater: Updater)
 36 |   extends Optimizer {
 37 | 
 38 |   private var stepSize: Double = 1.0
 39 |   private var numIterations: Int = 100
 40 |   private var regParam: Double = 0.0
 41 |   private var miniBatchFraction: Double = 1.0
 42 |   private var convergenceTol: Double = 0.001
 43 | 
 44 |   /**
 45 |    * Set the initial step size of SGD for the first step. Default 1.0.
 46 |    * In subsequent steps, the step size will decrease with stepSize/sqrt(t)
 47 |    */
 48 |   def setStepSize(step: Double): this.type = {
 49 |     this.stepSize = step
 50 |     this
 51 |   }
 52 | 
 53 |   /**
 54 |    * Set fraction of data to be used for each SGD iteration.
 55 |    * Default 1.0 (corresponding to deterministic/classical gradient descent)
 56 |    */
 57 |   def setMiniBatchFraction(fraction: Double): this.type = {
 58 |     this.miniBatchFraction = fraction
 59 |     this
 60 |   }
 61 | 
 62 |   /**
 63 |    * Set the number of iterations for SGD. Default 100.
 64 |    */
 65 |   def setNumIterations(iters: Int): this.type = {
 66 |     this.numIterations = iters
 67 |     this
 68 |   }
 69 | 
 70 |   /**
 71 |    * Set the regularization parameter. Default 0.0.
 72 |    */
 73 |   def setRegParam(regParam: Double): this.type = {
 74 |     this.regParam = regParam
 75 |     this
 76 |   }
 77 | 
 78 |   /**
 79 |    * Set the convergence tolerance. Default 0.001
 80 |    * convergenceTol is a condition which decides iteration termination.
 81 |    * The end of iteration is decided based on below logic.
 82 |    *
 83 |    *  - If the norm of the new solution vector is >1, the diff of solution vectors
 84 |    *    is compared to relative tolerance which means normalizing by the norm of
 85 |    *    the new solution vector.
 86 |    *  - If the norm of the new solution vector is <=1, the diff of solution vectors
 87 |    *    is compared to absolute tolerance which is not normalizing.
 88 |    *
 89 |    * Must be between 0.0 and 1.0 inclusively.
 90 |    */
 91 |   def setConvergenceTol(tolerance: Double): this.type = {
 92 |     require(0.0 <= tolerance && tolerance <= 1.0)
 93 |     this.convergenceTol = tolerance
 94 |     this
 95 |   }
 96 | 
 97 |   /**
 98 |    * Set the gradient function (of the loss function of one single data example)
 99 |    * to be used for SGD.
100 |    */
101 |   def setGradient(gradient: Gradient): this.type = {
102 |     this.gradient = gradient
103 |     this
104 |   }
105 | 
106 | 
107 |   /**
108 |    * Set the updater function to actually perform a gradient step in a given direction.
109 |    * The updater is responsible to perform the update from the regularization term as well,
110 |    * and therefore determines what kind or regularization is used, if any.
111 |    */
112 |   def setUpdater(updater: Updater): this.type = {
113 |     this.updater = updater
114 |     this
115 |   }
116 | 
117 |   /**
118 |    * Runs gradient descent on the given training data.
119 |  *
120 |    * @param data training data
121 |    * @param initialWeights initial weights
122 |    * @return solution vector
123 |    */
124 |   def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor = {
125 |     val (weights, _) = GradientDescent.runMiniBatchSGD(
126 |       data,
127 |       gradient,
128 |       updater,
129 |       stepSize,
130 |       numIterations,
131 |       regParam,
132 |       miniBatchFraction,
133 |       initialWeights,
134 |       convergenceTol)
135 |     weights
136 |   }
137 | 
138 | }
139 | 
140 | /**
141 |  * :: DeveloperApi ::
142 |  * Top-level method to run gradient descent.
143 |  */
144 | object GradientDescent {
145 |   /**
146 |    * Run stochastic gradient descent (SGD) in parallel using mini batches.
147 |    * In each iteration, we sample a subset (fraction miniBatchFraction) of the total data
148 |    * in order to compute a gradient estimate.
149 |    * Sampling, and averaging the subgradients over this subset is performed using one standard
150 |    * spark map-reduce in each iteration.
151 |    *
152 |    * @param data Input data for SGD. RDD of the set of data examples, each of
153 |    *             the form (label, [feature values]).
154 |    * @param gradient Gradient object (used to compute the gradient of the loss function of
155 |    *                 one single data example)
156 |    * @param updater Updater function to actually perform a gradient step in a given direction.
157 |    * @param stepSize initial step size for the first step
158 |    * @param numIterations number of iterations that SGD should be run.
159 |    * @param regParam regularization parameter
160 |    * @param miniBatchFraction fraction of the input data set that should be used for
161 |    *                          one iteration of SGD. Default value 1.0.
162 |    * @param convergenceTol Minibatch iteration will end before numIterations if the relative
163 |    *                       difference between the current weight and the previous weight is less
164 |    *                       than this value. In measuring convergence, L2 norm is calculated.
165 |    *                       Default value 0.001. Must be between 0.0 and 1.0 inclusively.
166 |    * @return A tuple containing two elements. The first element is a column matrix containing
167 |    *         weights for every feature, and the second element is an array containing the
168 |    *         stochastic loss computed for every iteration.
169 |    */
170 |   def runMiniBatchSGD(
171 |       data: RDD[(Double, Vector)],
172 |       gradient: Gradient,
173 |       updater: Updater,
174 |       stepSize: Double,
175 |       numIterations: Int,
176 |       regParam: Double,
177 |       miniBatchFraction: Double,
178 |       initialWeights: Tensor,
179 |       convergenceTol: Double): (Tensor, Array[Double]) = {
180 |     val log = LogManager.getRootLogger
181 | 
182 |     def logWarning(msg: => String) {
183 |       if (log.isEnabledFor(Level.WARN)) log.warn(msg)
184 |     }
185 |     def logInfo(msg: => String) {
186 |       if (log.isEnabledFor(Level.INFO)) log.info(msg)
187 |     }
188 | 
189 | 
190 |     // convergenceTol should be set with non minibatch settings
191 |     if (miniBatchFraction < 1.0 && convergenceTol > 0.0) {
192 |       logWarning("Testing against a convergenceTol when using miniBatchFraction " +
193 |         "< 1.0 can be unstable because of the stochasticity in sampling.")
194 |     }
195 | 
196 |     val stochasticLossHistory = new ArrayBuffer[Double](numIterations)
197 |     // Record previous weight and current one to calculate solution vector difference
198 | 
199 |     var previousWeights: Option[Tensor] = None
200 |     var currentWeights: Option[Tensor] = None
201 | 
202 |     val numExamples = data.count()
203 | 
204 |     // if no data, return initial weights to avoid NaNs
205 |     if (numExamples == 0) {
206 |       logWarning("GradientDescent.runMiniBatchSGD returning initial weights, no data found")
207 |       return (initialWeights, stochasticLossHistory.toArray)
208 |     }
209 | 
210 |     if (numExamples * miniBatchFraction < 1) {
211 |       logWarning("The miniBatchFraction is too small")
212 |     }
213 | 
214 |     // Initialize weights as a column vector
215 |     var weights = initialWeights
216 |     val n = weights.size
217 | 
218 |     /**
219 |      * For the first iteration, the regVal will be initialized as sum of weight squares
220 |      * if it's L2 updater; for L1 updater, the same logic is followed.
221 |      */
222 |     var regVal = updater.compute(
223 |       weights, new Tensor(Array(weights.size)), 0, 1, regParam)._2
224 | 
225 |     var converged = false // indicates whether converged based on convergenceTol
226 |     var i = 1
227 |     while (!converged && i <= numIterations) {
228 |       val bcWeights = data.context.broadcast(weights)
229 |       // Sample a subset (fraction miniBatchFraction) of the total data
230 |       // compute and sum up the subgradients on this subset (this is one map-reduce)
231 |       val (gradientSum, lossSum, miniBatchSize) = data.sample(false, miniBatchFraction, 42 + i)
232 |         .treeAggregate((new Tensor(Array(n)), 0.0, 0L))(
233 |           seqOp = (c, v) => {
234 |             // c: (grad, loss, count), v: (label, features)
235 |             val l = gradient.compute(v._2, v._1, bcWeights.value, c._1)
236 |             (c._1, c._2 + l, c._3 + 1)
237 |           },
238 |           combOp = (c1, c2) => {
239 |             // c: (grad, loss, count)
240 |             DenseTensor.axpy(1, c2._1, c1._1)
241 |             (c1._1, c1._2 + c2._2, c1._3 + c2._3)
242 |           })
243 | 
244 |       if (miniBatchSize > 0) {
245 |         /**
246 |          * lossSum is computed using the weights from the previous iteration
247 |          * and regVal is the regularization value computed in the previous iteration as well.
248 |          */
249 |         stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
250 |         DenseTensor.scal(miniBatchSize.toDouble, gradientSum)
251 |         val update = updater.compute(
252 |           weights, gradientSum,
253 |           stepSize, i, regParam)
254 |         weights = update._1
255 |         regVal = update._2
256 | 
257 |         previousWeights = currentWeights
258 |         currentWeights = Some(weights)
259 |         if (previousWeights != None && currentWeights != None) {
260 |           converged = isConverged(previousWeights.get,
261 |             currentWeights.get, convergenceTol)
262 |         }
263 |       } else {
264 |         logWarning(s"Iteration ($i/$numIterations). The size of sampled batch is zero")
265 |       }
266 |       i += 1
267 |     }
268 | 
269 |     logInfo("GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses %s".format(
270 |       stochasticLossHistory.takeRight(10).mkString(", ")))
271 | 
272 |     (weights, stochasticLossHistory.toArray)
273 | 
274 |   }
275 | 
276 | 
277 |   /**
278 |    * Alias of [[runMiniBatchSGD]] with convergenceTol set to default value of 0.001.
279 |    */
280 |   def runMiniBatchSGD(
281 |       data: RDD[(Double, Vector)],
282 |       gradient: Gradient,
283 |       updater: Updater,
284 |       stepSize: Double,
285 |       numIterations: Int,
286 |       regParam: Double,
287 |       miniBatchFraction: Double,
288 |       initialWeights: Tensor): (Tensor, Array[Double]) =
289 |     GradientDescent.runMiniBatchSGD(data, gradient, updater, stepSize, numIterations,
290 |                                     regParam, miniBatchFraction, initialWeights, 0.001)
291 | 
292 | 
293 |   private def isConverged(
294 |       previousWeights: Tensor,
295 |       currentWeights: Tensor,
296 |       convergenceTol: Double): Boolean = {
297 |     // This represents the difference of updated weights in the iteration.
298 |     val solutionVecDiff: Double = (previousWeights - currentWeights).norm
299 | 
300 |     solutionVecDiff < convergenceTol * Math.max(currentWeights.norm, 1.0)
301 |   }
302 | 
303 | }
304 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/optimization/LBFGS.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.optimization
 19 | 
 20 | import scala.collection.mutable
 21 | 
 22 | import breeze.linalg.{DenseVector => BDV}
 23 | import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS}
 24 | import org.apache.log4j.{Level, LogManager}
 25 | import org.apache.spark.ml.linalg.Vector
 26 | import org.apache.spark.rdd.RDD
 27 | 
 28 | import scaladl.layers.AnnTypes.Tensor
 29 | import scaladl.tensor.DenseTensor
 30 | 
 31 | 
 32 | class LBFGS(private var gradient: Gradient, private var updater: Updater)
 33 |   extends Optimizer {
 34 | 
 35 |   private var numCorrections = 10
 36 |   private var convergenceTol = 1E-6
 37 |   private var maxNumIterations = 100
 38 |   private var regParam = 0.0
 39 | 
 40 |   /**
 41 |    * Set the number of corrections used in the LBFGS update. Default 10.
 42 |    * Values of numCorrections less than 3 are not recommended; large values
 43 |    * of numCorrections will result in excessive computing time.
 44 |    * 3 < numCorrections < 10 is recommended.
 45 |    * Restriction: numCorrections > 0
 46 |    */
 47 |   def setNumCorrections(corrections: Int): this.type = {
 48 |     assert(corrections > 0)
 49 |     this.numCorrections = corrections
 50 |     this
 51 |   }
 52 | 
 53 |   /**
 54 |    * Set the convergence tolerance of iterations for L-BFGS. Default 1E-6.
 55 |    * Smaller value will lead to higher accuracy with the cost of more iterations.
 56 |    * This value must be nonnegative. Lower convergence values are less tolerant
 57 |    * and therefore generally cause more iterations to be run.
 58 |    */
 59 |   def setConvergenceTol(tolerance: Double): this.type = {
 60 |     this.convergenceTol = tolerance
 61 |     this
 62 |   }
 63 | 
 64 |   /*
 65 |    * Get the convergence tolerance of iterations.
 66 |    */
 67 |   def getConvergenceTol(): Double = {
 68 |     this.convergenceTol
 69 |   }
 70 | 
 71 |   /**
 72 |    * Set the maximal number of iterations for L-BFGS. Default 100.
 73 |     *
 74 |     * @deprecated use [[LBFGS#setNumIterations]] instead
 75 |    */
 76 |   @deprecated("use setNumIterations instead", "1.1.0")
 77 |   def setMaxNumIterations(iters: Int): this.type = {
 78 |     this.setNumIterations(iters)
 79 |   }
 80 | 
 81 |   /**
 82 |    * Set the maximal number of iterations for L-BFGS. Default 100.
 83 |    */
 84 |   def setNumIterations(iters: Int): this.type = {
 85 |     this.maxNumIterations = iters
 86 |     this
 87 |   }
 88 | 
 89 |   /**
 90 |    * Get the maximum number of iterations for L-BFGS. Defaults to 100.
 91 |    */
 92 |   def getNumIterations(): Int = {
 93 |     this.maxNumIterations
 94 |   }
 95 | 
 96 |   /**
 97 |    * Set the regularization parameter. Default 0.0.
 98 |    */
 99 |   def setRegParam(regParam: Double): this.type = {
100 |     this.regParam = regParam
101 |     this
102 |   }
103 | 
104 |   /**
105 |    * Get the regularization parameter.
106 |    */
107 |   def getRegParam(): Double = {
108 |     this.regParam
109 |   }
110 | 
111 |   /**
112 |    * Set the gradient function (of the loss function of one single data example)
113 |    * to be used for L-BFGS.
114 |    */
115 |   def setGradient(gradient: Gradient): this.type = {
116 |     this.gradient = gradient
117 |     this
118 |   }
119 | 
120 |   /**
121 |    * Set the updater function to actually perform a gradient step in a given direction.
122 |    * The updater is responsible to perform the update from the regularization term as well,
123 |    * and therefore determines what kind or regularization is used, if any.
124 |    */
125 |   def setUpdater(updater: Updater): this.type = {
126 |     this.updater = updater
127 |     this
128 |   }
129 | 
130 |   /**
131 |    * Returns the updater, limited to internal use.
132 |    */
133 |   private def getUpdater(): Updater = {
134 |     updater
135 |   }
136 | 
137 |   override def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor = {
138 |     val (weights, _) = LBFGS.runLBFGS(
139 |       data,
140 |       gradient,
141 |       updater,
142 |       numCorrections,
143 |       convergenceTol,
144 |       maxNumIterations,
145 |       regParam,
146 |       initialWeights)
147 |     weights
148 |   }
149 | 
150 | }
151 | 
152 | object LBFGS {
153 |   /**
154 |    * Run Limited-memory BFGS (L-BFGS) in parallel.
155 |    * Averaging the subgradients over different partitions is performed using one standard
156 |    * spark map-reduce in each iteration.
157 |    *
158 |    * @param data - Input data for L-BFGS. RDD of the set of data examples, each of
159 |    *               the form (label, [feature values]).
160 |    * @param gradient - Gradient object (used to compute the gradient of the loss function of
161 |    *                   one single data example)
162 |    * @param updater - Updater function to actually perform a gradient step in a given direction.
163 |    * @param numCorrections - The number of corrections used in the L-BFGS update.
164 |    * @param convergenceTol - The convergence tolerance of iterations for L-BFGS which is must be
165 |    *                         nonnegative. Lower values are less tolerant and therefore generally
166 |    *                         cause more iterations to be run.
167 |    * @param maxNumIterations - Maximal number of iterations that L-BFGS can be run.
168 |    * @param regParam - Regularization parameter
169 |     * @return A tuple containing two elements. The first element is a column matrix containing
170 |    *         weights for every feature, and the second element is an array containing the loss
171 |    *         computed for every iteration.
172 |    */
173 |   def runLBFGS(
174 |       data: RDD[(Double, Vector)],
175 |       gradient: Gradient,
176 |       updater: Updater,
177 |       numCorrections: Int,
178 |       convergenceTol: Double,
179 |       maxNumIterations: Int,
180 |       regParam: Double,
181 |       initialWeights: Tensor): (Tensor, Array[Double]) = {
182 | 
183 |     val log = LogManager.getRootLogger
184 | 
185 |     def logWarning(msg: => String) {
186 |       if (log.isEnabledFor(Level.WARN)) log.warn(msg)
187 |     }
188 |     def logInfo(msg: => String) {
189 |       if (log.isEnabledFor(Level.INFO)) log.info(msg)
190 |     }
191 | 
192 |     val lossHistory = mutable.ArrayBuilder.make[Double]
193 | 
194 |     val numExamples = data.count()
195 | 
196 |     val costFun =
197 |       new CostFun(data, gradient, updater, regParam, numExamples)
198 | 
199 |     val lbfgs = new BreezeLBFGS[BDV[Double]](maxNumIterations, numCorrections, convergenceTol)
200 | 
201 |     val initialWeightsBrz = BDV[Double](initialWeights.data)
202 | 
203 |     val states =
204 |       lbfgs.iterations(new CachedDiffFunction(costFun), initialWeightsBrz)
205 | 
206 |     /**
207 |      * NOTE: lossSum and loss is computed using the weights from the previous iteration
208 |      * and regVal is the regularization value computed in the previous iteration as well.
209 |      */
210 |     var state = states.next()
211 |     while (states.hasNext) {
212 |       lossHistory += state.value
213 |       state = states.next()
214 |     }
215 |     lossHistory += state.value
216 |     val weights = new Tensor(state.x.data, Array(state.x.data.length), 0)
217 | 
218 |     val lossHistoryArray = lossHistory.result()
219 | 
220 |     logInfo("LBFGS.runLBFGS finished. Last 10 losses %s".format(
221 |       lossHistoryArray.takeRight(10).mkString(", ")))
222 | 
223 |     (weights, lossHistoryArray)
224 |   }
225 | 
226 |   /**
227 |    * CostFun implements Breeze's DiffFunction[T], which returns the loss and gradient
228 |    * at a particular point (weights). It's used in Breeze's convex optimization routines.
229 |    */
230 |   private class CostFun(
231 |     data: RDD[(Double, Vector)],
232 |     gradient: Gradient,
233 |     updater: Updater,
234 |     regParam: Double,
235 |     numExamples: Long) extends DiffFunction[BDV[Double]] {
236 | 
237 |     override def calculate(weights: BDV[Double]): (Double, BDV[Double]) = {
238 |       // Have a local copy to avoid the serialization of CostFun object which is not serializable.
239 |       val w = new Tensor(weights.data, Array(weights.data.length), 0)
240 |       val n = w.size
241 |       val bcW = data.context.broadcast(w)
242 |       val localGradient = gradient
243 | 
244 |       val (gradientSum, lossSum) = data.treeAggregate((new Tensor(Array(n)), 0.0))(
245 |           seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) =>
246 |             val l = localGradient.compute(
247 |               features, label, bcW.value, grad)
248 |             (grad, loss + l)
249 |           },
250 |           combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) =>
251 |             DenseTensor.axpy(1, grad2, grad1)
252 |             (grad1, loss1 + loss2)
253 |           })
254 | 
255 |       /**
256 |        * regVal is sum of weight squares if it's L2 updater;
257 |        * for other updater, the same logic is followed.
258 |        */
259 |       val regVal = updater.compute(w, new Tensor(Array(n)), 0, 1, regParam)._2
260 | 
261 |       val loss = lossSum / numExamples + regVal
262 |       /**
263 |        * It will return the gradient part of regularization using updater.
264 |        *
265 |        * Given the input parameters, the updater basically does the following,
266 |        *
267 |        * w' = w - thisIterStepSize * (gradient + regGradient(w))
268 |        * Note that regGradient is function of w
269 |        *
270 |        * If we set gradient = 0, thisIterStepSize = 1, then
271 |        *
272 |        * regGradient(w) = w - w'
273 |        *
274 |        * TODO: We need to clean it up by separating the logic of regularization out
275 |        *       from updater to regularizer.
276 |        */
277 |       // The following gradientTotal is actually the regularization part of gradient.
278 |       // Will add the gradientSum computed from the data with weights in the next step.
279 |       val gradientTotal = w.copy()
280 |       DenseTensor.axpy(-1.0,
281 |         updater.compute(w, new Tensor(Array(n)), 1, 1, regParam)._1, gradientTotal)
282 | 
283 |       // gradientTotal = gradientSum / numExamples + gradientTotal
284 |       DenseTensor.axpy(1.0 / numExamples, gradientSum, gradientTotal)
285 | 
286 |       (loss, new BDV[Double](gradientTotal.data))
287 |     }
288 |   }
289 | }
290 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/optimization/Optimizer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.optimization
19 | 
20 | import org.apache.spark.ml.linalg.Vector
21 | import org.apache.spark.rdd.RDD
22 | 
23 | import scaladl.layers.AnnTypes.Tensor
24 | 
25 | trait Optimizer extends Serializable {
26 | 
27 |   /**
28 |    * Solve the provided convex optimization problem.
29 |    */
30 |   def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/optimization/Updater.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.optimization
19 | 
20 | import scaladl.layers.AnnTypes.Tensor
21 | 
22 | abstract class Updater extends Serializable {
23 |   /**
24 |    * Compute an updated value for weights given the gradient, stepSize, iteration number and
25 |    * regularization parameter. Also returns the regularization value regParam * R(w)
26 |    * computed using the *updated* weights.
27 |    *
28 |    * @param weightsOld - Column matrix of size dx1 where d is the number of features.
29 |    * @param gradient - Column matrix of size dx1 where d is the number of features.
30 |    * @param stepSize - step size across iterations
31 |    * @param iter - Iteration number
32 |    * @param regParam - Regularization parameter
33 |    * @return A tuple of 2 elements. The first element is a column matrix containing updated weights,
34 |    *         and the second element is the regularization value computed using updated weights.
35 |    */
36 |   def compute(
37 |                weightsOld: Tensor,
38 |                gradient: Tensor,
39 |                stepSize: Double,
40 |                iter: Int,
41 |                regParam: Double): (Tensor, Double)
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/main/scala/scaladl/tensor/DenseTensor.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.tensor
 19 | 
 20 | import scala.collection.mutable.ArrayBuffer
 21 | import scala.reflect.ClassTag
 22 | 
 23 | import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
 24 | 
 25 | object Algebra {
 26 |   trait NumberLike[@specialized (Double, Float) T] extends Serializable {
 27 |     def plus(x: T, y: T): T
 28 |     def minus(x: T, y: T): T
 29 |     def times(x: T, y: T): T
 30 |     def sqrt(x: T): T
 31 |   }
 32 |   object NumberLike {
 33 |     implicit object NumberLikeDouble extends NumberLike[Double] {
 34 |       def plus(x: Double, y: Double): Double = x + y
 35 |       def minus(x: Double, y: Double): Double = x - y
 36 |       def times(x: Double, y: Double): Double = x * y
 37 |       def sqrt(x: Double): Double = math.sqrt(x)
 38 |     }
 39 |     implicit object NumberLikeFloat extends NumberLike[Float] {
 40 |       def plus(x: Float, y: Float): Float = x + y
 41 |       def minus(x: Float, y: Float): Float = x - y
 42 |       def times(x: Float, y: Float): Float = x * y
 43 |       def sqrt(x: Float): Float = math.sqrt(x.toDouble).toFloat
 44 |     }
 45 |   }
 46 | }
 47 |   import Algebra.NumberLike
 48 | 
 49 | /**
 50 |  * Dense tensor column-major representation. // TODO: row major??
 51 |  *
 52 |  * @param data underlying data
 53 |  * @param tensorShape shape of tensor
 54 |  * @param offset offset in the data
 55 |  * @tparam T type
 56 |  */
 57 | class DenseTensor[@specialized(Double, Float) T] (
 58 |   val data: Array[T],
 59 |   val tensorShape: Array[Int],
 60 |   val offset: Int,
 61 |   isTransposed: Boolean = false)(implicit numOps: NumberLike[T]) extends Serializable {
 62 | 
 63 |   private var actualSize: Int = 0
 64 |   private var majorStride: Int = 0
 65 |   private var requiredSize: Int = 0
 66 |   // Fix of the Scala specialized constructor bug:
 67 |   // http://axel22.github.io/2013/11/03/specialization-quirks.html
 68 |   protected def init(data: Array[T], tensorShape: Array[Int]): Unit = {
 69 |     actualSize = data.length - offset
 70 |     majorStride = if (isTransposed) tensorShape.last else tensorShape.head
 71 |     requiredSize = tensorShape.product
 72 |   }
 73 |   init(data, tensorShape)
 74 |   // TODO: figure out which of size, shape etc can be removed or replaced in other functions
 75 | //  private val actualSize = data.size//data.length - offset
 76 | //  // Major stride (always the first??? dimension since stored in columnar format)
 77 | //  private val majorStride = if (isTransposed) tensorShape.last else tensorShape.head
 78 | //  private val requiredSize = tensorShape.product
 79 |   require(requiredSize <= actualSize,
 80 |     "Actual size of the array does not correspond to dimension Sizes")
 81 |   private var myShape = tensorShape
 82 | 
 83 |   /**
 84 |    * Allocate new tensor
 85 |    * @param tensorShape tensor shape
 86 |    * @param m type parameter
 87 |    * @param numOps ops parameter
 88 |    */
 89 |   def this(tensorShape: Array[Int])(implicit m: ClassTag[T], numOps: NumberLike[T]) = {
 90 |     this(new Array[T](tensorShape.product), tensorShape, 0)
 91 |   }
 92 | 
 93 |   /**
 94 |    * New tensor given data and shape
 95 |    * @param data data array
 96 |    * @param tensorShape shape
 97 |    * @param m type
 98 |    * @param numOps ops
 99 |    */
100 |   def this(data: Array[T], tensorShape: Array[Int])
101 |           (implicit m: ClassTag[T], numOps: NumberLike[T]) = {
102 |     this(data, tensorShape, 0, false)
103 |   }
104 | 
105 |   /**
106 |    * Don't use this in loops!!!
107 |    * @return
108 |    */
109 |   def size: Int = myShape.product
110 |   /**
111 |    * Shape of the tensor
112 |    *
113 |    * @return shape
114 |    */
115 |   def shape: Array[Int] = myShape
116 | 
117 |   /**
118 |    * Reshape the tensor. Supports reshaping within the same data size
119 |    *
120 |    * @param newShape new shape
121 |    * @return reshaped tensor backed by the same data
122 |    */
123 |   def reshape(newShape: Array[Int]): DenseTensor[T] = {
124 |     val requiredSize = newShape.product
125 |     require(requiredSize == actualSize)
126 |     myShape = newShape
127 |     this
128 |   }
129 | 
130 |   /**
131 |    * Update value of a Tensor
132 |    *
133 |    * @param index index
134 |    * @param value value
135 |    */
136 |   def update(index: Int, value: T): Unit = {
137 |     require(index >=0 && index < requiredSize)
138 |     data(this.offset + index) = value
139 |   }
140 | 
141 |   /**
142 |    * Update value of a Tensor
143 |    *
144 |    * @param index index
145 |    * @param value value
146 |    */
147 |   def update(index: Array[Int], value: T): Unit = {
148 |     data(offset(index)) = value
149 |   }
150 | 
151 |   /**
152 |    * Get the value at position index
153 |    *
154 |    * @param index index
155 |    * @return value
156 |    */
157 |   def value(index: Int): T = {
158 |     require(index >=0 && index < requiredSize)
159 |     data(this.offset + index)
160 |   }
161 | 
162 |   /**
163 |    * Get the value at position index
164 |    *
165 |    * @param index index
166 |    * @return value
167 |    */
168 |   def value(index: Array[Int]): T = {
169 |     data(offset(index))
170 |   }
171 | 
172 |   private def offset(index: Array[Int]): Int = {
173 |     var offset = index.last
174 |     for (i <- myShape.length - 1 to 1 by -1) {
175 |       offset = index(i - 1) + myShape(i - 1) * offset
176 |     }
177 |     offset
178 |   }
179 | 
180 |   /**
181 |    * Check if tensor is transposed
182 |    *
183 |    * @return true if transposed, false otherwise
184 |    */
185 |   def transposed: Boolean = isTransposed
186 | 
187 |   /**
188 |    * Transpose tensor. Does not actually transpose the data.
189 |    * It is used for operations such as gemm.
190 |    *
191 |    * @return self
192 |    */
193 |   def transpose(implicit m: ClassTag[T]): DenseTensor[T] = {
194 |     require(tensorShape.length == 2, "Transpose is valid only for 2 dimensional tensor")
195 |     val transposedTensor = DenseTensor[T](data, tensorShape.reverse, offset, true)
196 |     transposedTensor
197 |   }
198 | 
199 |   /**
200 |    * Slice the tensor by the last dimension
201 |    *
202 |    * @param from index
203 |    * @param until index
204 |    * @return tensor backed by the same data
205 |    */
206 |   def slice(from: Int, until: Int): DenseTensor[T] = {
207 |     require(from < until && from < myShape(0) && until <= myShape(0),
208 |       "start and end must be within the size of first dimension, also start <= end")
209 |     val shapeInit = myShape.init
210 |     val lastDimensionNewSize = until - from
211 |     val startOffset = offset(shapeInit.map(_ => 0) :+ from)
212 |     new DenseTensor[T](data, shapeInit :+ lastDimensionNewSize, startOffset)
213 |   }
214 | 
215 |   /**
216 |    * Slice the tensor by one index in the last dimension
217 |    *
218 |    * @param index index
219 |    * @return squeezed tensor
220 |    */
221 |   def slice(index: Int): DenseTensor[T] = {
222 |     slice(index, index + 1).squeeze()
223 |   }
224 | 
225 |   /**
226 |    * Squeze the dimensions of size 1
227 |    *
228 |    * @return tensor backed by the same data
229 |    */
230 |   def squeeze(): DenseTensor[T] = {
231 |     val buf = new ArrayBuffer[Int](myShape.length)
232 |     for (dim <- myShape) {
233 |       if (dim > 1) buf += dim
234 |     }
235 |     myShape = buf.toArray
236 |     this
237 |   }
238 | 
239 |   /**
240 |    * Copy the underlying data
241 |    *
242 |    * @param m ClassTag
243 |    * @return data array
244 |    */
245 |   def copyData()(implicit m: ClassTag[T]): Array[T] = {
246 |     val array = new Array[T](myShape.product)
247 |     System.arraycopy(data, offset, array, 0, array.length)
248 |     array
249 |   }
250 | 
251 |   def copy()(implicit m: ClassTag[T]): DenseTensor[T] = {
252 |     val array = new Array[T](myShape.product)
253 |     System.arraycopy(data, offset, array, 0, array.length)
254 |     new DenseTensor(array, myShape, offset, isTransposed)
255 |   }
256 | 
257 |   /**
258 |    * Fill tensor with the data from the other tensor
259 |    *
260 |    * @param donor tensor from which to get data
261 |    * @return self
262 |    */
263 |   def fillWith(donor: DenseTensor[T]): DenseTensor[T] = {
264 |     require(size % donor.size == 0 && size >= donor.size,
265 |       "data size of recipient tensor must be >= and divide evenly by the data size of donor tensor")
266 |     val donorSize = donor.size
267 |     val numCopies = size / donorSize
268 |     var k = 0
269 |     var nextOffset = 0
270 |     while (k <  numCopies) {
271 |       System.arraycopy(donor.data, donor.offset, this.data, this.offset + nextOffset, donorSize)
272 |       nextOffset += donorSize
273 |       k += 1
274 |     }
275 |     this
276 |   }
277 | 
278 |   /**
279 |    * Plus operation
280 |    * @param other other tensor
281 |    * @param m type parameter
282 |    * @return returns new tensor
283 |    */
284 |   def +(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = {
285 |     require(equalShape(other), "Must be equal shape")
286 |     val sz = size
287 |     val newData = new Array[T](sz)
288 |     var thisIndex = this.offset
289 |     var otherIndex = other.offset
290 |     var i = 0
291 |     while (i < sz) {
292 |       newData(i) = numOps.plus(this.data(thisIndex), other.data(otherIndex))
293 |       thisIndex += 1
294 |       otherIndex += 1
295 |       i += 1
296 |     }
297 |     DenseTensor(newData, shape.clone())
298 |   }
299 | 
300 |   /**
301 |    * Minus operation
302 |    * @param other other tensor
303 |    * @param m type parameter
304 |    * @return returns new tensor
305 |    */
306 |   def -(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = {
307 |     require(equalShape(other), "Must be equal shape")
308 |     val sz = size
309 |     val newData = new Array[T](sz)
310 |     var i = 0
311 |     while (i < sz) {
312 |       newData(i) = numOps.minus(this.data(this.offset + i), other.data(other.offset + i))
313 |       i += 1
314 |     }
315 |     DenseTensor(newData, shape.clone())
316 |   }
317 | 
318 |   /**
319 |    * Elementwise multiplication
320 |    * @param other other tensor
321 |    * @param m type parameter
322 |    * @return returns new tensor
323 |    */
324 |   def :*(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = {
325 |     require(equalShape(other), "Must be equal shape")
326 |     val sz = size
327 |     val newData = new Array[T](sz)
328 |     var i = 0
329 |     while (i < sz) {
330 |       newData(i) = numOps.times(this.data(this.offset + i), other.data(other.offset + i))
331 |       i += 1
332 |     }
333 |     DenseTensor(newData, shape.clone())
334 |   }
335 | 
336 |   // TODO: fix this ugliness
337 |   private def equalShape(other: DenseTensor[T]): Boolean = {
338 |     val thisShape = this.shape
339 |     val otherShape = other.shape
340 |     if (thisShape.length != otherShape.length) {
341 |       return false
342 |     } else {
343 |       var i = 0
344 |       while (i < thisShape.length) {
345 |         if (thisShape(i) != otherShape(i)) {
346 |           return false
347 |         }
348 |         i += 1
349 |       }
350 |     }
351 |     true
352 |   }
353 | 
354 |   /**
355 |    * Sum of the elements
356 |    * @return sum
357 |    */
358 |   def sum: T = {
359 |     var i = offset
360 |     var mySum = numOps.minus(data(i), data(i))
361 |     val max = offset + size
362 |     while (i < max) {
363 |       mySum = numOps.plus(mySum, data(i))
364 |       i += 1
365 |     }
366 |     mySum
367 |   }
368 | 
369 |   /**
370 |    * Norm of the vector
371 |    * @return norm
372 |    */
373 |   def norm: T = {
374 |     var i = offset
375 |     var mySum = numOps.minus(data(i), data(i))
376 |     val max = offset + size
377 |     while (i < max) {
378 |       mySum = numOps.plus(mySum, numOps.times(data(i), data(i)))
379 |       i += 1
380 |     }
381 |     numOps.sqrt(mySum)
382 |   }
383 | 
384 |   /**
385 |    * Equals for transposed, shape and data
386 |    * @param other tensor
387 |    * @return true if equal, false overwise
388 |    */
389 |   def isEqual(other: DenseTensor[T]): Boolean = {
390 |     if (this.transposed != other.transposed || !equalShape(other)) {
391 |       return false
392 |     } else {
393 |       var i = 0
394 |       while (i < data.length) {
395 |         if (data(i) != other.data(i)) {
396 |           return false
397 |         }
398 |         i += 1
399 |       }
400 |     }
401 |     true
402 |   }
403 | 
404 |   override def toString(): String = {
405 |     // TODO: implement row-by-row print
406 |     val buf = new StringBuilder()
407 |     for (i <- offset until offset + size) {
408 |       var product: Int = 1
409 |       val index = new Array[Int](myShape.length)
410 |       for (dim <- 0 until myShape.length - 1) {
411 |         val dimValue = (i / product) % myShape(dim)
412 |         product *= myShape(dim)
413 |         index(dim) = dimValue
414 |       }
415 |       index(myShape.length - 1) = i / product
416 |       buf.append(value(index))
417 |       buf.append(" ")
418 |     }
419 |     buf.toString()
420 |   }
421 | 
422 | }
423 | 
424 | object DenseTensor {
425 | 
426 |   /**
427 |    * Create a tensor with zeros
428 |    *
429 |    * @param tensorShape shape
430 |    * @param m ClassTag
431 |    * @tparam T implicit type
432 |    * @return tensor
433 |    */
434 |   def apply[@specialized(Double, Float) T](tensorShape: Array[Int])
435 |                                           (implicit m: ClassTag[T],
436 |                                            numOps: NumberLike[T]): DenseTensor[T] = {
437 |     val data: Array[T] = new Array[T](tensorShape.product)
438 |     DenseTensor(data, tensorShape)
439 |   }
440 | 
441 |   /**
442 |    * Create a tensor from data
443 |    *
444 |    * @param data data
445 |    * @param tensorShape shape
446 |    * @param offset offset in the data
447 |    * @param m ClassTag
448 |    * @tparam T implicit type
449 |    * @return tensor
450 |    */
451 |   def apply[@specialized(Double, Float) T](
452 |   data: Array[T],
453 |   tensorShape: Array[Int],
454 |   offset: Int = 0,
455 |   isTransposed: Boolean = false)
456 |   (implicit m: ClassTag[T], numOps: NumberLike[T]): DenseTensor[T] = {
457 |     new DenseTensor[T](data, tensorShape, offset, isTransposed)
458 |   }
459 | 
460 |   /**
461 |    * Create and fill tensor with values
462 |    *
463 |    * @param tensorShape shape
464 |    * @param elem value
465 |    * @param m ClassTag
466 |    * @tparam T type
467 |    * @return tensor
468 |    */
469 |   def fill[@specialized(Double, Float) T](tensorShape: Array[Int])
470 |                                          (elem: => T)
471 |                                          (implicit m: ClassTag[T],
472 |                                           numOps: NumberLike[T]): DenseTensor[T] = {
473 |     val data: Array[T] = Array.fill[T](tensorShape.product)(elem)
474 |     DenseTensor(data, tensorShape)
475 |   }
476 | 
477 |   /**
478 |    * Apply a function to tensor x in place
479 |    *
480 |    * @param x source
481 |    * @param func function
482 |    * @tparam T type
483 |    */
484 |   def applyFunction[@specialized(Double, Float) T](x: DenseTensor[T], func: T => T)
485 |                       (implicit m: ClassTag[T], numOps: NumberLike[T]): Unit = {
486 |     var i = x.offset
487 |     val sz = x.offset + x.size
488 |     while (i < sz) {
489 |       x.data(i) = func(x.data(i))
490 |       i += 1
491 |     }
492 |   }
493 | 
494 |   /**
495 |    * Apply a function to tensor x and put the result in the y
496 |    *
497 |    * @param x source
498 |    * @param y result
499 |    * @param func function
500 |    * @tparam T type
501 |    */
502 |   def applyFunction[@specialized(Double, Float) T](x: DenseTensor[T],
503 |                                                    y: DenseTensor[T],
504 |                                                    func: T => T)
505 |                                                   (implicit m: ClassTag[T],
506 |                                                    numOps: NumberLike[T]): Unit = {
507 |     require(x.size == y.size, "Tensor sizes must be equal")
508 |     var i = 0
509 |     val sz = y.size
510 |     while (i < sz) {
511 |       y.data(y.offset + i) = func(x.data(x.offset + i))
512 |       i += 1
513 |     }
514 |   }
515 | 
516 |   /**
517 |    * Apply a function to tensor x and put the result in the y
518 |    *
519 |    * @param x1 source1
520 |    * @param x2 source2
521 |    * @param y result
522 |    * @param func function
523 |    * @tparam T type
524 |    */
525 |   def applyFunction[@specialized(Double, Float) T](
526 |   x1: DenseTensor[T],
527 |   x2: DenseTensor[T],
528 |   y: DenseTensor[T],
529 |   func: (T, T) => T)(implicit m: ClassTag[T], numOps: NumberLike[T]): Unit = {
530 |     require(x1.size == y.size && x2.size == y.size, "Tensor sizes must be equal")
531 |     var i = 0
532 |     val sz = y.offset + y.size
533 |     while (i < sz) {
534 |       y.data(y.offset + i) = func(x1.data(x1.offset + i), x2.data(x2.offset + i))
535 |       i += 1
536 |     }
537 |   }
538 | 
539 |   /**
540 |    * Double 2d tensor multiplication C <- alpha * A * B + beta * C
541 |    *
542 |    * @param alpha alpha
543 |    * @param a A
544 |    * @param b B
545 |    * @param beta beta
546 |    * @param c C
547 |    */
548 |   def gemm(
549 |   alpha: Double,
550 |   a: DenseTensor[Double],
551 |   b: DenseTensor[Double],
552 |   beta: Double,
553 |   c: DenseTensor[Double]): Unit = {
554 |     // TODO: case with 3d and more
555 |     require(a.shape.length == 2 && b.shape.length == 2 && c.shape.length == 2,
556 |       "A, B, or C are not 2d tensors")
557 |     // TODO: add code if matrices isTranspose!!!
558 |     require(a.shape(1) == b.shape(0), "A & B Dimension mismatch!")
559 |     require(a.shape(0) == c.shape(0), "A & C Dimension mismatch!")
560 |     require(b.shape(1) == c.shape(1), "B & C Dimension mismatch!")
561 |     NativeBLAS.dgemm(transposeString(a), transposeString(b), c.shape(0), c.shape(1), a.shape(1),
562 |     // TODO: check majorStride
563 |       alpha, a.data, a.offset, a.majorStride,
564 |       b.data, b.offset, b.majorStride,
565 |       beta, c.data, c.offset, c.shape(0))
566 |   }
567 | 
568 |   /**
569 |    * Double 2d tensor multiplication C <- alpha * A * B + beta * C
570 |    *
571 |    * @param alpha alpha
572 |    * @param a A
573 |    * @param b B
574 |    * @param beta beta
575 |    * @param c C
576 |    */
577 |   def gemm(
578 |              alpha: Float,
579 |              a: DenseTensor[Float],
580 |              b: DenseTensor[Float],
581 |              beta: Float,
582 |              c: DenseTensor[Float]): Unit = {
583 |     // TODO: case with 3d and more
584 |     require(a.shape.length == 2 && b.shape.length == 2 && c.shape.length == 2,
585 |       "A, B, or C are not 2d tensors")
586 |     // TODO: add code if matrices isTranspose!!!
587 |     require(a.shape(1) == b.shape(0), "A & B Dimension mismatch!")
588 |     require(a.shape(0) == c.shape(0), "A & C Dimension mismatch!")
589 |     require(b.shape(1) == c.shape(1), "B & C Dimension mismatch!")
590 |     NativeBLAS.sgemm(transposeString(a), transposeString(b), c.shape(0), c.shape(1), a.shape(1),
591 |       // TODO: check majorStride
592 |       alpha, a.data, a.offset, a.majorStride,
593 |       b.data, b.offset, b.majorStride,
594 |       beta, c.data, c.offset, c.shape(0))
595 |   }
596 | 
597 |   private def transposeString[T](a: DenseTensor[T]): String = if (a.transposed) "T" else "N"
598 | 
599 |   /**
600 |    * GEMV: y := alpha * A * x + beta * y
601 |    *
602 |    * @param alpha alpha
603 |    * @param a A
604 |    * @param x x
605 |    * @param beta beta
606 |    * @param y y
607 |    */
608 |   def gemv(
609 |   alpha: Double,
610 |   a: DenseTensor[Double],
611 |   x: DenseTensor[Double],
612 |   beta: Double,
613 |   y: DenseTensor[Double]): Unit = {
614 |     require(a.shape.length == 2 && x.shape.length == 1 && y.shape.length == 1,
615 |       "A must be 2d and X, Y - 1d tensors")
616 |     require(a.shape(1) == x.shape(0), "A & X Dimension mismatch!")
617 |     require(a.shape(0) == y.shape(0), "A & Y Dimension mismatch!")
618 |     NativeBLAS.dgemv(transposeString(a), a.shape(0), a.shape(1),
619 |       alpha, a.data, a.offset, a.shape(0),
620 |       x.data, x.offset, 1,
621 |       beta, y.data, y.offset, 1)
622 |   }
623 | 
624 |   /**
625 |    * GEMV: y := alpha * A * x + beta * y
626 |    *
627 |    * @param alpha alpha
628 |    * @param a A
629 |    * @param x x
630 |    * @param beta beta
631 |    * @param y y
632 |    */
633 |   def gemv(
634 |   alpha: Float,
635 |   a: DenseTensor[Float],
636 |   x: DenseTensor[Float],
637 |   beta: Float,
638 |   y: DenseTensor[Float]): Unit = {
639 |     require(a.shape.length == 2 && x.shape.length == 1 && y.shape.length == 1,
640 |       "A must be 2d and X, Y - 1d tensors")
641 |     require(a.shape(1) == x.shape(0), "A & X Dimension mismatch!")
642 |     require(a.shape(0) == y.shape(0), "A & Y Dimension mismatch!")
643 |     NativeBLAS.sgemv(transposeString(a), a.shape(0), a.shape(1),
644 |       alpha, a.data, a.offset, a.shape(0),
645 |       x.data, x.offset, 1,
646 |       beta, y.data, y.offset, 1)
647 |   }
648 | 
649 |   /**
650 |    * y := alpha * x + y
651 |    *
652 |    * @param alpha alpha
653 |    * @param x vector x
654 |    * @param y vector y
655 |    */
656 |   def axpy(alpha: Double, x: DenseTensor[Double], y: DenseTensor[Double]): Unit = {
657 |     require(x.size == y.size, "x and y sizes equals")
658 |     val n = x.size
659 |     NativeBLAS.daxpy(n, alpha, x.data, 1, y.data, 1)
660 |   }
661 | 
662 |   /**
663 |    * y := alpha * x + y
664 |    *
665 |    * @param alpha alpha
666 |    * @param x vector x
667 |    * @param y vector y
668 |    */
669 |   def axpy(alpha: Float, x: DenseTensor[Float], y: DenseTensor[Float]): Unit = {
670 |     require(x.size == y.size, "x and y sizes equals")
671 |     val n = x.size
672 |     NativeBLAS.saxpy(n, alpha, x.data, 1, y.data, 1)
673 |   }
674 | 
675 |   /**
676 |    * x := alpha * x
677 |    * @param alpha alpha
678 |    * @param x vector x
679 |    */
680 |   def scal(alpha: Double, x: DenseTensor[Double]): Unit = {
681 |     val n = x.size
682 |     NativeBLAS.dscal(n, alpha, x.data, x.offset, 1)
683 |   }
684 | 
685 |   /**
686 |    * x := alpha * x
687 |    * @param alpha alpha
688 |    * @param x x
689 |    */
690 |   def scal(alpha: Float, x: DenseTensor[Float]): Unit = {
691 |     val n = x.size
692 |     NativeBLAS.sscal(n, alpha, x.data, x.offset, 1)
693 |   }
694 | 
695 |   protected def elementwise(
696 |   a: DenseTensor[Double],
697 |   b: DenseTensor[Double],
698 |   op: (Double, Double) => Double): Unit = {
699 |     require(a.size == b.size, "Tensors of different size")
700 |     var i = 0
701 |     val sz = a.size
702 |     while (i < sz) {
703 |       a.data(i) = op(a.data(i), b.data(i))
704 |       i += 1
705 |     }
706 |   }
707 | 
708 |   /**
709 |    * Elementwise product a := a * b
710 |    *
711 |    * @param a vector a
712 |    * @param b vector b
713 |    */
714 |   def elementwiseProduct(a: DenseTensor[Double], b: DenseTensor[Double]): Unit = {
715 |     elementwise(a, b, (x, y) => x * y)
716 |   }
717 | }
718 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/ml/scaladl/ANNSpeedSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.ml.scaladl
 19 | 
 20 | import org.apache.spark.ml.classification.{MultilayerPerceptronClassifier => SMLP}
 21 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
 22 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier => TMLP}
 23 | import org.scalatest.FunSuite
 24 | 
 25 | import scaladl.util.SparkTestContext
 26 | 
 27 | class ANNSpeedSuite extends FunSuite with SparkTestContext {
 28 | 
 29 | //  test ("speed test") {
 30 | //    val mnistPath = System.getenv("MNIST_HOME")
 31 | //    println(mnistPath + "/mnist.scale")
 32 | //    val dataFrame = sqlContext.
 33 | //      createDataFrame(MLUtils.loadLibSVMFile(sc, mnistPath + "/mnist.scale", 784)).persist()
 34 | //    dataFrame.count()
 35 | //    val mlp = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10))
 36 | //      .setTol(10e-9)
 37 | //      .setMaxIter(20)
 38 | //      .setSeed(1234L)
 39 | //    val t = System.nanoTime()
 40 | //    val model = mlp.fit(dataFrame)
 41 | //    val total = System.nanoTime() - t
 42 | //    println("Total time: " + total / 1e9 + " s. (should be ~42s. without native BLAS")
 43 | //    val test = sqlContext.
 44 | //      createDataFrame(MLUtils.loadLibSVMFile(sc, mnistPath + "/mnist.scale.t", 784)).persist()
 45 | //    test.count()
 46 | //    val result = model.transform(test)
 47 | //    val pl = result.select("prediction", "label")
 48 | //    val ev = new MulticlassClassificationEvaluator().setMetricName("precision")
 49 | //    println("Accuracy: " + ev.evaluate(pl))
 50 | //  }
 51 | 
 52 |   test ("speed test with tensor (native BLAS and MNIST_HOME needs to be configured") {
 53 |     val mnistPath = System.getenv("MNIST_HOME")
 54 |     val dataFrame = spark
 55 |       .read
 56 |       .format("libsvm")
 57 |       .option("numFeatures", 784)
 58 |       .load(mnistPath + "/mnist.scale")
 59 |       .persist()
 60 |     dataFrame.count()
 61 |     val layers = Array(784, 100, 10)
 62 |     val maxIter = 20
 63 |     val tol = 1e-9
 64 |     val warmUp = new SMLP().setLayers(layers)
 65 |       .setTol(10e-9)
 66 |       .setMaxIter(1)
 67 |       .setSeed(1234L)
 68 |       .fit(dataFrame)
 69 |     val weights = warmUp.weights
 70 | 
 71 |     val mlp = new SMLP().setLayers(layers)
 72 |       .setTol(tol)
 73 |       .setMaxIter(maxIter)
 74 |       .setInitialWeights(weights.copy)
 75 |     val t = System.nanoTime()
 76 |     val model = mlp.fit(dataFrame)
 77 |     val total = System.nanoTime() - t
 78 |     val tensorMLP = new TMLP().setLayers(layers)
 79 |       .setTol(tol)
 80 |       .setMaxIter(maxIter)
 81 |       .setInitialWeights(weights.copy)
 82 |     val tTensor = System.nanoTime()
 83 |     val tModel = tensorMLP.fit(dataFrame)
 84 |     val totalTensor = System.nanoTime() - tTensor
 85 |     // time is 49.9 s on my machine
 86 |     assert(math.abs(totalTensor - total) / 1e9 < 0.15 * total /1e9,
 87 |       "Training time of tensor version should differ no more than 15% s. from original version")
 88 |     val test = spark
 89 |       .read
 90 |       .format("libsvm")
 91 |       .option("numFeatures", 784)
 92 |       .load(mnistPath + "/mnist.scale.t")
 93 |       .persist()
 94 |     test.count()
 95 |     val result = model.transform(test)
 96 |     val pl = result.select("prediction", "label")
 97 |     val ev = new MulticlassClassificationEvaluator().setMetricName("accuracy")
 98 |     val tResult = tModel.transform(test)
 99 |     val tpl = tResult.select("prediction", "label")
100 |     val tev = new MulticlassClassificationEvaluator().setMetricName("accuracy")
101 |     assert(tev.evaluate(tpl) == ev.evaluate(pl), "Accuracies must be equal")
102 |   }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/ml/scaladl/MultilayerPerceptronClassifierSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.ml.scaladl
19 | 
20 | import org.apache.spark.ml.linalg.Vectors
21 | import org.apache.spark.sql.Row
22 | import org.scalatest.FunSuite
23 | 
24 | import scaladl.util.SparkTestContext
25 | 
26 | class MultilayerPerceptronClassifierSuite extends FunSuite with SparkTestContext {
27 | 
28 |   test("XOR function learning as binary classification problem with two outputs.") {
29 |     val dataFrame = spark.createDataFrame(Seq(
30 |       (Vectors.dense(0.0, 0.0), 0.0),
31 |       (Vectors.dense(0.0, 1.0), 1.0),
32 |       (Vectors.dense(1.0, 0.0), 1.0),
33 |       (Vectors.dense(1.0, 1.0), 0.0))
34 |     ).toDF("features", "label")
35 |     val layers = Array[Int](2, 5, 2)
36 |     val trainer = new MultilayerPerceptronClassifier()
37 |       .setLayers(layers)
38 |       .setBlockSize(1)
39 |       .setSeed(123L)
40 |       .setMaxIter(100)
41 |     val model = trainer.fit(dataFrame)
42 |     val result = model.transform(dataFrame)
43 |     val predictionAndLabels = result.select("prediction", "label").collect()
44 |     predictionAndLabels.foreach { case Row(p: Double, l: Double) =>
45 |       assert(p == l)
46 |     }
47 |   }
48 | 
49 |   test("Test setWeights by training restart") {
50 |     val dataFrame = spark.createDataFrame(Seq(
51 |       (Vectors.dense(0.0, 0.0), 0.0),
52 |       (Vectors.dense(0.0, 1.0), 1.0),
53 |       (Vectors.dense(1.0, 0.0), 1.0),
54 |       (Vectors.dense(1.0, 1.0), 0.0))
55 |     ).toDF("features", "label")
56 |     val layers = Array[Int](2, 5, 2)
57 |     val trainer = new MultilayerPerceptronClassifier()
58 |       .setLayers(layers)
59 |       .setBlockSize(1)
60 |       .setSeed(123456L)
61 |       .setMaxIter(1)
62 |       .setTol(1e-6)
63 |     val initialWeights = trainer.fit(dataFrame).weights
64 |     trainer.setInitialWeights(initialWeights.copy)
65 |     val weights1 = trainer.fit(dataFrame).weights
66 |     trainer.setInitialWeights(initialWeights.copy)
67 |     val weights2 = trainer.fit(dataFrame).weights
68 |     weights1.toArray.zip(weights2.toArray).foreach { x =>
69 |       assert(math.abs(x._1 - x._2) <= 10e-5,
70 |         "Training should produce the same weights given equal initial weights and number of steps")
71 |     }
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/ml/scaladl/StackedAutoencoderSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.ml.scaladl
 19 | 
 20 | import org.apache.spark.ml.linalg.{Vector, Vectors}
 21 | import org.apache.spark.sql.Row
 22 | import org.scalatest.FunSuite
 23 | 
 24 | import scaladl.util.SparkTestContext
 25 | 
 26 | class StackedAutoencoderSuite extends FunSuite with SparkTestContext {
 27 | 
 28 |   // using data similar to https://inst.eecs.berkeley.edu/~cs182/sp08/assignments/a3-tlearn.html
 29 |   val binaryData = Seq(
 30 |     Vectors.dense(Array(1.0, 0.0, 0.0, 0.0)),
 31 |     Vectors.dense(Array(0.0, 1.0, 0.0, 0.0)),
 32 |     Vectors.dense(Array(0.0, 0.0, 1.0, 0.0)),
 33 |     Vectors.dense(Array(0.0, 0.0, 0.0, 1.0)))
 34 | 
 35 |   val real01Data = Seq(
 36 |     Vectors.dense(Array(0.5, 0.1, 0.1, 0.1)),
 37 |     Vectors.dense(Array(0.1, 0.6, 0.5, 0.5)),
 38 |     Vectors.dense(Array(0.5, 0.5, 0.5, 0.5)),
 39 |     Vectors.dense(Array(0.9, 0.9, 0.9, 0.9)))
 40 | 
 41 |   val realData = Seq(
 42 |     Vectors.dense(Array(10.0, 0.0, 0.0, 0.0)),
 43 |     Vectors.dense(Array(0.0, 1.0, 0.0, 0.0)),
 44 |     Vectors.dense(Array(0.0, 0.0, 10.0, 0.0)),
 45 |     Vectors.dense(Array(0.0, 0.0, 0.0, 10.0)))
 46 | 
 47 |   test("Autoencoder reconstructs the original data by encoding and decoding") {
 48 |     val dataSets = Seq(binaryData, real01Data, realData)
 49 |     val dataTypes = Seq(true, true, false)
 50 |     val dataSetAndTypes = dataSets.zip(dataTypes)
 51 |     for ((data, is01) <- dataSetAndTypes) {
 52 |       val rdd = sc.parallelize(data, 1).map(x => Tuple1(x))
 53 |       val df = spark.createDataFrame(rdd).toDF("input")
 54 |       val stackedAutoencoder = new StackedAutoencoder()
 55 |         .setLayers(Array(4, 3, 3))
 56 |         .setBlockSize(1)
 57 |         .setMaxIter(100)
 58 |         .setSeed(123456789L)
 59 |         .setTol(1e-6)
 60 |         .setInputCol("input")
 61 |         .setOutputCol("output")
 62 |         .setDataIn01Interval(is01)
 63 |         .setBuildDecoder(true)
 64 |       // TODO: find a way to inherit the input and output parameter value from estimator
 65 |       val saModel = stackedAutoencoder.fit(df)
 66 |       saModel.setInputCol("input").setOutputCol("encoded")
 67 |       // encoding
 68 |       val encodedData = saModel.transform(df)
 69 |       // decoding
 70 |       saModel.setInputCol("encoded").setOutputCol("decoded")
 71 |       val decodedData = saModel.decode(encodedData)
 72 |       // epsilon == 1/100 of the maximum value
 73 |       val eps = if (is01) 1.0 / 100 else 10.0 / 100
 74 |       decodedData.collect.foreach { case Row(input: Vector, _: Vector, decoded: Vector) =>
 75 |         input.toArray.zip(decoded.toArray).foreach { x =>
 76 |           assert(math.abs(x._1 - x._2) <= eps,
 77 |             "Decoder should produce vectors close to the input")
 78 |         }
 79 |       }
 80 |     }
 81 |   }
 82 | 
 83 |   test("Autoencoder use for pre-training") {
 84 |     val seed = 123456789L
 85 |     val numIter = 20
 86 |     val dataFrame = spark.createDataFrame(Seq(
 87 |       (Vectors.dense(0.0, 0.0), 0.0),
 88 |       (Vectors.dense(0.0, 1.0), 1.0),
 89 |       (Vectors.dense(1.0, 0.0), 1.0),
 90 |       (Vectors.dense(1.0, 1.0), 0.0))
 91 |     ).toDF("features", "label")
 92 |     val layers = Array[Int](2, 7, 6, 5, 4, 3, 2)
 93 |     val trainer = new MultilayerPerceptronClassifier()
 94 |       .setLayers(layers)
 95 |       .setBlockSize(1)
 96 |       .setSeed(seed)
 97 |       .setMaxIter(1)
 98 |       .setTol(1e-6)
 99 |     val initialWeights = trainer.fit(dataFrame).weights
100 |     trainer
101 |       .setInitialWeights(initialWeights.copy)
102 |       .setMaxIter(numIter)
103 |     val badModel = trainer.fit(dataFrame)
104 |     val badResult = badModel.transform(dataFrame)
105 |     val badPredictionAndLabels = badResult.select("prediction", "label").collect()
106 |     // solution converged to a bad optimum
107 |     assert(!badPredictionAndLabels.forall { case Row(p: Double, l: Double) =>
108 |       p == l
109 |     }, "Model should not predict as expected")
110 | 
111 |     // pre-train all layers except last as stacked autoencoder
112 |     val encoderLayers = layers.init
113 |     val autoEncoder = new StackedAutoencoder("stackedAutoencoder")
114 |       .setBlockSize(1)
115 |       .setBuildDecoder(false)
116 |       .setDataIn01Interval(true)
117 |       .setInputCol("features")
118 |       .setLayers(encoderLayers)
119 |       .setMaxIter(numIter)
120 |       .setSeed(seed)
121 |       .setTol(1e-6)
122 |     val autoEncoderModel = autoEncoder.fit(dataFrame)
123 |     val autoEncoderWeights = autoEncoderModel.encoderWeights
124 |     // initialize weights for the classifier and copy pre-trained weights
125 |     System.arraycopy(
126 |       autoEncoderWeights.toArray, 0, initialWeights.toArray, 0, autoEncoderWeights.toArray.length)
127 |     val preTrainer = new MultilayerPerceptronClassifier()
128 |       .setLayers(layers)
129 |       .setBlockSize(1)
130 |       .setInitialWeights(initialWeights)
131 |       .setMaxIter(numIter)
132 |       .setTol(1e-6)
133 |     val preModel = preTrainer.fit(dataFrame)
134 |     val preResult = preModel.transform(dataFrame)
135 |     val predictionAndLabels = preResult.select("prediction", "label").collect()
136 |     predictionAndLabels.foreach { case Row(p: Double, l: Double) =>
137 |       assert(p == l, "Training after pre-training should succeed")
138 |     }
139 |   }
140 | }
141 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/layers/GradientSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.layers
19 | 
20 | import org.apache.spark.ml.linalg.Vectors
21 | import org.scalatest.FunSuite
22 | 
23 | import scaladl.layers.AnnTypes._
24 | import scaladl.tensor.DenseTensor
25 | 
26 | class GradientSuite extends FunSuite {
27 | 
28 |   test("Gradient computation against numerical differentiation") {
29 |     val x = DenseTensor[Double](Array(1.0, 1.0, 1.0), Array(3, 1))
30 |     val input = new Tensor(Array(1.0, 1.0, 1.0), Array(3, 1))
31 |     // output must contain zeros and one 1 for SoftMax
32 |     val target = new Tensor(Array(0.0, 1.0), Array(2, 1))
33 |     val topology = FeedForwardTopology.multiLayerPerceptron(Array(3, 4, 2), softmaxOnTop = false)
34 |     val layersWithErrors = Seq(
35 |       new SigmoidLayerWithSquaredError(),
36 |       new SoftmaxLayerWithCrossEntropyLoss(),
37 |       new SigmoidLayerWithCrossEntropyLoss(),
38 |       new EmptyLayerWithSquaredError()
39 |     )
40 |     // check all layers that provide loss computation
41 |     // 1) compute loss and gradient given the model and initial weights
42 |     // 2) modify weights with small number epsilon (per dimension i)
43 |     // 3) compute new loss
44 |     // 4) ((newLoss - loss) / epsilon) should be close to the i-th component of the gradient
45 |     for (layerWithError <- layersWithErrors) {
46 |       topology.layers(topology.layers.length - 1) = layerWithError
47 |       val model = topology.model(seed = 12L)
48 |       val weights = model.weights.toArray
49 |       val numWeights = weights.size
50 |       val gradient = new Tensor(Array(numWeights))
51 |       val loss = model.computeGradient(input, target, gradient, 1)
52 |       val eps = 1e-4
53 |       var i = 0
54 |       val tol = 1e-4
55 |       while (i < numWeights) {
56 |         val originalValue = weights(i)
57 |         weights(i) += eps
58 |         val newModel = topology.model(Vectors.dense(weights))
59 |         val newLoss = computeLoss(input, target, newModel)
60 |         val derivativeEstimate = (newLoss - loss) / eps
61 |         assert(math.abs(gradient.value(i) - derivativeEstimate) < tol,
62 |           "Layer failed gradient check: " + layerWithError.getClass)
63 |         weights(i) = originalValue
64 |         i += 1
65 |       }
66 |     }
67 |   }
68 | 
69 |   private def computeLoss(input: Tensor, target: Tensor, model: TopologyModel): Double = {
70 |     val outputs = model.forward(input)
71 |     model.layerModels.last match {
72 |       case layerWithLoss: LossFunction =>
73 |         layerWithLoss.loss(outputs.last, target, new Tensor(target.shape))
74 |       case _ =>
75 |         throw new UnsupportedOperationException("Top layer is required to have loss." +
76 |           " Failed layer:" + model.layerModels.last.getClass)
77 |     }
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/layers/LayerSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.layers
19 | 
20 | import org.apache.spark.ml.linalg.Vectors
21 | import org.scalatest.FunSuite
22 | 
23 | import scaladl.util.SparkTestContext
24 | 
25 | class LayerSuite extends FunSuite with SparkTestContext {
26 | 
27 |   // TODO: test for weights comparison with Weka MLP
28 |   test("ANN with Sigmoid learns XOR function with LBFGS optimizer") {
29 |     val inputs = Array(
30 |       Array(0.0, 0.0),
31 |       Array(0.0, 1.0),
32 |       Array(1.0, 0.0),
33 |       Array(1.0, 1.0)
34 |     )
35 |     val outputs = Array(0.0, 1.0, 1.0, 0.0)
36 |     val data = inputs.zip(outputs).map { case (features, label) =>
37 |       (Vectors.dense(features), Vectors.dense(label))
38 |     }
39 |     val rddData = sc.parallelize(data, 1)
40 |     val hiddenLayersTopology = Array(5)
41 |     val dataSample = rddData.first()
42 |     val layerSizes = dataSample._1.size +: hiddenLayersTopology :+ dataSample._2.size
43 |     val topology = FeedForwardTopology.multiLayerPerceptron(layerSizes, false)
44 |     val initialWeights = FeedForwardModel(topology, 23124).weights
45 |     val trainer = new FeedForwardTrainer(topology, 2, 1)
46 |     trainer.setWeights(initialWeights)
47 |     trainer.LBFGSOptimizer.setNumIterations(20)
48 |     val model = trainer.train(rddData)
49 |     val predictionAndLabels = rddData.map { case (input, label) =>
50 |       (model.predict(input)(0), label(0))
51 |     }.collect()
52 |     predictionAndLabels.foreach { case (p, l) =>
53 |       assert(math.round(p) === l)
54 |     }
55 |   }
56 | 
57 |   test("ANN with SoftMax learns XOR function with 2-bit output and batch GD optimizer") {
58 |     val inputs = Array(
59 |       Array(0.0, 0.0),
60 |       Array(0.0, 1.0),
61 |       Array(1.0, 0.0),
62 |       Array(1.0, 1.0)
63 |     )
64 |     val outputs = Array(
65 |       Array(1.0, 0.0),
66 |       Array(0.0, 1.0),
67 |       Array(0.0, 1.0),
68 |       Array(1.0, 0.0)
69 |     )
70 |     val data = inputs.zip(outputs).map { case (features, label) =>
71 |       (Vectors.dense(features), Vectors.dense(label))
72 |     }
73 |     val rddData = sc.parallelize(data, 1)
74 |     val hiddenLayersTopology = Array(5)
75 |     val dataSample = rddData.first()
76 |     val layerSizes = dataSample._1.size +: hiddenLayersTopology :+ dataSample._2.size
77 |     val topology = FeedForwardTopology.multiLayerPerceptron(layerSizes, false)
78 |     val initialWeights = FeedForwardModel(topology, 23124).weights
79 |     val trainer = new FeedForwardTrainer(topology, 2, 2)
80 |     // TODO: add a test for SGD
81 |     trainer.LBFGSOptimizer.setConvergenceTol(1e-4).setNumIterations(20)
82 |     trainer.setWeights(initialWeights).setStackSize(1)
83 |     val model = trainer.train(rddData)
84 |     val predictionAndLabels = rddData.map { case (input, label) =>
85 |       (model.predict(input), label)
86 |     }.collect()
87 |     predictionAndLabels.foreach { case (p, l) =>
88 |       p.toArray.zip(l.toArray).foreach(pair => assert(math.abs(pair._1 - pair._2) < 0.5))
89 |     }
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/tensor/DenseTensorSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package scaladl.tensor
 19 | 
 20 | import org.scalatest.FunSuite
 21 | 
 22 | class DenseTensorSuite  extends FunSuite {
 23 | 
 24 |   test ("value") {
 25 |     val data = Array[Double](1, 2, 3, 4, 5, 6, 7, 8)
 26 |     val shape2d = Array(4, 2)
 27 |     val tensor2d = DenseTensor[Double](data, shape2d)
 28 |     assert(tensor2d.value(Array(2, 1)) == 7.0, "(1, 1) must be 7.0")
 29 |     val shape3d = Array(2, 2, 2)
 30 |     val tensor3d = DenseTensor[Double](data, shape3d)
 31 |     assert(tensor3d.value(Array(1, 1, 1)) == 8.0, "(1, 1, 1) must be 8.0")
 32 |   }
 33 | 
 34 |   test ("slice") {
 35 |     val data8 = Array[Double](0, 1, 2, 3, 4, 5, 6, 7)
 36 |     val shape2d = Array(4, 2)
 37 |     val tensor2d = DenseTensor[Double](data8, shape2d)
 38 |     val slice2d = tensor2d.slice(1, 2)
 39 |     assert(slice2d.copyData().deep == data8.slice(4, 8).deep,
 40 |       "The resulting slice must be (4, 5, 6, 7) ")
 41 |     val data12 = Array[Double](0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
 42 |     val shape3d = Array(2, 2, 3)
 43 |     val tensor3d = DenseTensor[Double](data12, shape3d)
 44 |     val slice3d = tensor3d.slice(1, 2)
 45 |     assert(slice3d.copyData().deep == data12.slice(4, 8).deep,
 46 |       "The resulting slice must be (4, 5, 6, 7) ")
 47 |     val shape5d = Array(2, 1, 2, 1, 3)
 48 |     val tensor5d = DenseTensor[Double](data12, shape5d)
 49 |     val slice5dto2d = tensor5d.slice(1)
 50 |     assert(slice5dto2d.copyData().deep == data12.slice(4, 8).deep,
 51 |       "The resulting slice must be (4, 5, 6, 7) ")
 52 |   }
 53 | 
 54 |   test ("apply function") {
 55 |     val shape2d = Array(4, 2)
 56 |     val a = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d)
 57 |     DenseTensor.applyFunction(a, (t: Double) => t * t)
 58 |     assert(a.copyData().deep == Array[Double](0, 1, 4, 9, 16, 25, 36, 49).deep,
 59 |       "The result must be (1, 2, 3, 4, 5, 6, 7, 8)")
 60 |     val x = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d)
 61 |     val y = DenseTensor[Double](shape2d)
 62 |     def func: (Double) => Double = v => v + 1
 63 |     DenseTensor.applyFunction[Double](x, y, func)
 64 |     assert(y.copyData().deep == Array[Double](1, 2, 3, 4, 5, 6, 7, 8).deep,
 65 |       "The result must be (1, 2, 3, 4, 5, 6, 7, 8)")
 66 |     val x2 = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d)
 67 |     val x1 = x
 68 |     val z = DenseTensor[Double](shape2d)
 69 |     def func2: (Double, Double) => Double = (v1, v2) => v1 + v2
 70 |     DenseTensor.applyFunction[Double](x1, x2, z, func2)
 71 |     assert(z.copyData().deep == Array[Double](0, 2, 4, 6, 8, 10, 12, 14).deep,
 72 |       "The result must be (0, 2, 4, 6, 8, 10, 12, 14)")
 73 |   }
 74 | 
 75 |   test ("fillWith") {
 76 |     val recipient = DenseTensor[Double](Array(4, 2))
 77 |     val donor = DenseTensor[Double](Array[Double](0, 1, 2, 3), Array(4, 1))
 78 |     recipient.fillWith(donor)
 79 |     assert(recipient.copyData().deep == Array[Double](0, 1, 2, 3, 0, 1, 2, 3).deep,
 80 |       "The result must be (0, 1, 2, 3, 0, 1, 2, 3)")
 81 |   }
 82 | 
 83 |   test ("fill") {
 84 |     val onesTensor = DenseTensor.fill[Double](Array(1, 2, 1))(1.0)
 85 |     assert(onesTensor.copyData().forall(x => x == 1.0), "All elements are 1.0")
 86 |   }
 87 | 
 88 |   test ("plus double") {
 89 |     val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
 90 |     val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
 91 |     val z = x + y
 92 |     val trueZ = DenseTensor[Double](Array[Double](2, 4, 6, 8, 10, 12), Array(2, 3))
 93 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
 94 |   }
 95 | 
 96 |   test ("plus float") {
 97 |     val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
 98 |     val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
 99 |     val z = x + y
100 |     val trueZ = DenseTensor[Float](Array[Float](2, 4, 6, 8, 10, 12), Array(2, 3))
101 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
102 |   }
103 | 
104 |   test ("minus double") {
105 |     val x = DenseTensor[Double](Array[Double](2, 4, 6, 8, 10, 12), Array(2, 3))
106 |     val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
107 |     val z = x - y
108 |     val trueZ = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
109 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
110 |   }
111 | 
112 |   test ("minus float") {
113 |     val x = DenseTensor[Float](Array[Float](2, 4, 6, 8, 10, 12), Array(2, 3))
114 |     val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
115 |     val z = x - y
116 |     val trueZ = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
117 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
118 |   }
119 | 
120 |   test ("elementwise product double") {
121 |     val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
122 |     val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
123 |     val z = x :* y
124 |     val trueZ = DenseTensor[Double](Array[Double](1, 4, 9, 16, 25, 36), Array(2, 3))
125 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
126 |   }
127 | 
128 |   test ("elementwise product float") {
129 |     val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
130 |     val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
131 |     val z = x :* y
132 |     val trueZ = DenseTensor[Float](Array[Float](1, 4, 9, 16, 25, 36), Array(2, 3))
133 |     assert(z.isEqual(trueZ), "Transposed, shape or data differs")
134 |   }
135 | 
136 |   test ("sum double") {
137 |     val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
138 |     assert(x.sum == 21, "Sum has to be 21")
139 |   }
140 | 
141 |   test ("sum float") {
142 |     val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
143 |     assert(x.sum == 21, "Sum has to be 21")
144 |   }
145 | 
146 |   test ("axpy double precision") {
147 |     val alpha = 2
148 |     val x = DenseTensor[Double](Array[Double](0.5, 1, 1.5, 2, 2.5, 3), Array(6))
149 |     val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(6))
150 |     DenseTensor.axpy(alpha, x, y)
151 |     assert(y.copyData().deep == Array[Double](2, 4, 6, 8, 10, 12).deep)
152 |   }
153 | 
154 |   test ("axpy single precision") {
155 |     val alpha = 2
156 |     val x = DenseTensor[Float](Array[Float](0.5f, 1f, 1.5f, 2f, 2.5f, 3f), Array(6))
157 |     val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(6))
158 |     DenseTensor.axpy(alpha, x, y)
159 |     assert(y.copyData().deep == Array[Float](2, 4, 6, 8, 10, 12).deep)
160 |   }
161 | 
162 |   test ("dgemm double precision") {
163 |     val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
164 |     val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2))
165 |     val c = DenseTensor[Double](Array(2, 2))
166 |     DenseTensor.gemm(1.0, a, b, 0.0, c)
167 |     assert(c.copyData().deep == Array[Double](22, 28, 49, 64).deep)
168 |     DenseTensor.gemm(0.5, a, b, 0.5, c)
169 |     assert(c.copyData().deep == Array[Double](22, 28, 49, 64).deep)
170 |   }
171 | 
172 |   test ("dgemm double precision transpose") {
173 |     val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2))
174 |     val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2))
175 |     val c = DenseTensor[Double](Array(2, 2))
176 |     DenseTensor.gemm(1.0, a.transpose, b, 0.0, c)
177 |     assert(c.copyData().deep == Array[Double](14, 32, 32, 77).deep)
178 |   }
179 | 
180 |   test ("dgemm single precision") {
181 |     val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
182 |     val b = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2))
183 |     val c = DenseTensor[Float](Array(2, 2))
184 |     DenseTensor.gemm(1.0f, a, b, 0.0f, c)
185 |     assert(c.copyData().deep == Array[Float](22, 28, 49, 64).deep)
186 |     DenseTensor.gemm(0.5f, a, b, 0.5f, c)
187 |     assert(c.copyData().deep == Array[Float](22, 28, 49, 64).deep)
188 |   }
189 | 
190 |   test ("dgemm single precision transpose") {
191 |     val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2))
192 |     val b = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2))
193 |     val c = DenseTensor[Float](Array(2, 2))
194 |     DenseTensor.gemm(1.0f, a.transpose, b, 0.0f, c)
195 |     assert(c.copyData().deep == Array[Double](14, 32, 32, 77).deep)
196 |   }
197 | 
198 |   test("gemv double precision") {
199 |     val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
200 |     val x = DenseTensor[Double](Array[Double](1, 2, 3), Array(3))
201 |     val y = DenseTensor[Double](Array[Double](2, 2), Array(2))
202 |     DenseTensor.gemv(1.0, a, x, 0.5, y)
203 |     assert(y.copyData().deep == Array[Double](23, 29).deep)
204 |   }
205 | 
206 |   test("gemv single precision") {
207 |     val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3))
208 |     val x = DenseTensor[Float](Array[Float](1, 2, 3), Array(3))
209 |     val y = DenseTensor[Float](Array[Float](2, 2), Array(2))
210 |     DenseTensor.gemv(1.0f, a, x, 0.5f, y)
211 |     assert(y.copyData().deep == Array[Float](23, 29).deep)
212 |   }
213 | 
214 |   test ("elementwise product") {
215 |     val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
216 |     val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3))
217 |     DenseTensor.elementwiseProduct(a, b)
218 |     assert(a.copyData().deep == Array[Double](1, 4, 9, 16, 25, 36).deep)
219 |   }
220 | }
221 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/tensor/NumericBoxingTest.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.tensor
19 | 
20 | class NumericBoxingTest[@specialized(Double, Float) T : Numeric] {
21 |   lazy val numOps = implicitly[Numeric[T]]
22 |   def plus(x: T, y: T): T = numOps.plus(x, y)
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/tensor/TypedClassTest.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.tensor
19 | 
20 | import scaladl.tensor.Math.NumberLike
21 | 
22 | object Math {
23 |   trait NumberLike[@specialized (Double, Int) T] {
24 |     def plus(x: T, y: T): T
25 |   }
26 |   object NumberLike {
27 |     implicit object NumberLikeDouble extends NumberLike[Double] {
28 |       def plus(x: Double, y: Double): Double = x + y
29 |     }
30 |     implicit object NumberLikeInt extends NumberLike[Int] {
31 |       def plus(x: Int, y: Int): Int = x + y
32 |     }
33 |   }
34 | }
35 | object Statistics {
36 |   import Math.NumberLike
37 |   def plus[@specialized (Double, Int) T](x: T, y: T)(implicit ev: NumberLike[T]): T =
38 |     ev.plus(x, y)
39 |   def plusDouble(x: Double, y: Double): Double = x + y
40 | }
41 | 
42 | class My[@specialized (Double, Int) T](implicit ev: NumberLike[T]) {
43 |   def plus(x: T, y: T): T = ev.plus(x, y)
44 | }
45 | 
46 | object TypedClassTest {
47 |   def main(args: Array[String]): Unit = {
48 | //    Statistics.plus(2.0, 2.0)
49 | //    Statistics.plusDouble(2.0, 2.0)
50 |     val m = new My[Double]()
51 |     m.plus(2.0, 2.0)
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/scala/scaladl/util/SparkTestContext.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package scaladl.util
19 | 
20 | import org.apache.log4j.{Level, Logger}
21 | import org.apache.spark.sql.SparkSession
22 | import org.apache.spark.SparkContext
23 | import org.scalatest.{BeforeAndAfterAll, Suite}
24 | 
25 | trait SparkTestContext extends BeforeAndAfterAll { self: Suite =>
26 |   @transient var spark: SparkSession = _
27 |   @transient var sc: SparkContext = _
28 |   @transient var checkpointDir: String = _
29 | 
30 |   override def beforeAll() {
31 |     super.beforeAll()
32 |     spark = SparkSession.builder
33 |       .master("local[2]")
34 |       .appName("MLlibUnitTest")
35 |       .config("spark.sql.warehouse.dir", "warehouse-temp")
36 |       .getOrCreate()
37 |     sc = spark.sparkContext
38 |     Logger.getLogger("org").setLevel(Level.WARN)
39 |   }
40 | 
41 |   override def afterAll() {
42 |     try {
43 |       SparkSession.clearActiveSession()
44 |       if (spark != null) {
45 |         spark.stop()
46 |       }
47 |       spark = null
48 |     } finally {
49 |       super.afterAll()
50 |     }
51 |   }
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------