├── .gitignore ├── LICENSE ├── README.md ├── build.sbt ├── pom.xml ├── project └── plugins.sbt ├── scalastyle-config.xml └── src ├── main └── scala │ ├── org │ └── apache │ │ └── spark │ │ └── ml │ │ └── scaladl │ │ ├── MultilayerPerceptronClassifier.scala │ │ └── StackedAutoencoder.scala │ └── scaladl │ ├── examples │ ├── MnistClassification.scala │ └── MnistEncoding.scala │ ├── layers │ ├── Layer.scala │ └── LossFunction.scala │ ├── optimization │ ├── Gradient.scala │ ├── GradientDescent.scala │ ├── LBFGS.scala │ ├── Optimizer.scala │ └── Updater.scala │ └── tensor │ └── DenseTensor.scala └── test └── scala ├── org └── apache │ └── spark │ └── ml │ └── scaladl │ ├── ANNSpeedSuite.scala │ ├── MultilayerPerceptronClassifierSuite.scala │ └── StackedAutoencoderSuite.scala └── scaladl ├── layers ├── GradientSuite.scala └── LayerSuite.scala ├── tensor ├── DenseTensorSuite.scala ├── NumericBoxingTest.scala └── TypedClassTest.scala └── util └── SparkTestContext.scala /.gitignore: -------------------------------------------------------------------------------- 1 | # use glob syntax. 2 | syntax: glob 3 | *.ser 4 | *.class 5 | *~ 6 | *.bak 7 | #*.off 8 | *.old 9 | 10 | # eclipse conf file 11 | .settings 12 | .classpath 13 | .project 14 | .manager 15 | .scala_dependencies 16 | 17 | # idea 18 | .idea 19 | *.iml 20 | 21 | # building 22 | target 23 | build 24 | null 25 | tmp* 26 | temp* 27 | dist 28 | test-output 29 | build.log 30 | 31 | # other scm 32 | .svn 33 | .CVS 34 | .hg* 35 | 36 | # switch to regexp syntax. 37 | # syntax: regexp 38 | # ^\.pc/ 39 | 40 | #SHITTY output not in target directory 41 | build.log 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Scalable Implementation of Deep Learning on Spark 2 | This library is based on the implementation of artificial neural networks in [Spark ML](https://spark.apache.org/docs/latest/ml-classification-regression.html#multilayer-perceptron-classifier). In addition to the multilayer perceptron, it contains new [Spark deep learning features](https://issues.apache.org/jira/browse/SPARK-5575) that were not yet merged to Spark ML. Currently, they are Stacked Autoencoder and tensor data flow. Highlights of the library: 3 | - Provides Spark ML pipeline API 4 | - Implements data parallel training 5 | - Supports native CPU BLAS 6 | - Employs tensor data flow 7 | - Provides extensible API for developers of new features 8 | 9 | ## Installation 10 | ### Requirements 11 | - Apache Spark 2.0 or higher 12 | - Java and Scala 13 | - Maven 14 | 15 | ### Build 16 | Clone and compile: 17 | ``` 18 | git clone https://github.com/avulanov/scalable-deeplearning.git 19 | cd scalable-deeplearning 20 | sbt assembly (or mvn assembly) 21 | ``` 22 | The jar library will be availabe in `target` folder. `assembly` includes optimized numerical processing library netlib-java. Optionally, one can build `package`. 23 | 24 | ### Performance configuration 25 | Scaladl uses [netlib-java](https://github.com/fommil/netlib-java) library for optimized numerical processing with native [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms). All netlib-java classes are included in scaladl.jar. The latter has to be in the classpath before Spark's own libraries because Spark has a subset of netlib. In order to do this, set `spark.driver.userClassPathFirst` to `true` in `spark-defaults.conf`. 26 | 27 | If native BLAS libraries are not available at runtime or scaladl is not the first in the classpath, you will see a warning `WARN BLAS: Failed to load implementation from:` and reference or pure JVM implementation will be used. Native BLAS library such as OpenBLAS (`libopenblas.so` or `dll`) or ATLAS (`libatlas.so`) should be in the path of all nodes that run Spark. Netlib-java requires the library to be named as `libblas.so.3`, and one has to create a symlink. The same is for Windows and `libblas3.dll`. Below are the setup details for different platforms. With proper configuration, you will see an info `INFO JniLoader: successfully loaded ...netlib-native_system-....` 28 | 29 | ### Linux: 30 | Install native blas library (depending on your distributive): 31 | ``` 32 | yum install openblas apt-get openblas download and compile OpenBLAS 33 | ``` 34 | Create symlink to native BLAS within its folder `/your/blas` 35 | ``` 36 | ln -s libopenblas.so libblas.so.3 37 | ``` 38 | Add it to your library path. Make sure there is no other folder with `libblas.so.3` in your path. 39 | ``` 40 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/your/blas 41 | ``` 42 | ### Windows: 43 | Copy the following dlls from MINGW distribution and from OpenBLAS to the folder `blas`. Make sure they are all the same 64 or 32 bit. Add that folder to your `path` variable. 44 | ``` 45 | libquadmath-0.dll // MINGW 46 | libgcc_s_seh-1.dll // MINGW 47 | libgfortran-3.dll // MINGW 48 | libopeblas.dll // OpenBLAS binary 49 | liblapack3.dll // copy of libopeblas.dll 50 | libblas3.dll // copy of libopenblas.dll 51 | ``` 52 | - MinGW https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win64/Automated%20Builds/ 53 | - OpenBLAS http://www.openblas.net/ 54 | 55 | ## Example of use 56 | ### Built-in examples 57 | Scaldl provides working examples of MNIST classification and pre-training with stacked autoencoder. Examples are in [`scaladl.examples`](https://github.com/avulanov/scalable-deeplearning/tree/master/src/main/scala/scaladl/examples) package. They can be run via Spark submit: 58 | ``` 59 | ./spark-submit --class scaladl.examples.MnistClassification --master spark://master:7077 /path/to/scaldl.jar /path/to/mnist-libsvm 60 | ``` 61 | ### Spark shell 62 | Start Spark with this library: 63 | ``` 64 | ./spark-shell --jars scaladl.jar 65 | ``` 66 | Or use it as external dependency for your application. 67 | 68 | ### Multilayer perceptron 69 | MNIST classification 70 | - Load MNIST handwritten recognition data stored in [LIBSVM format](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html) as a DataFrame 71 | - Initialize the multilayer perceptron classifier with 784 inputs, 32 neurons in hidden layer and 10 outputs 72 | - Train and predict 73 | 74 | ```scala 75 | import org.apache.spark.ml.scaladl.MultilayerPerceptronClassifier 76 | val train = spark.read.format("libsvm").option("numFeatures", 784).load("mnist.scale").persist() 77 | val test = spark.read.format("libsvm").option("numFeatures", 784).load("mnist.scale.t").persist() 78 | train.count() // materialize data lazy persisted in memory 79 | test.count() // materialize data lazy persisted in memory 80 | val trainer = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10)).setMaxIter(100) 81 | val model = trainer.fit(train) 82 | val result = model.transform(test) 83 | ``` 84 | ### Stacked Autoencoder 85 | Pre-training 86 | - Load MNIST data 87 | - Initialize the stacked autoencoder with 784 inputs and 32 neurons in hidden layer 88 | - Train stacked autoencoder 89 | - Initialize the multilayer perceptron classifier with 784 inputs, 32 neurons in hidden layer and 90 | ```scala 91 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier, StackedAutoencoder} 92 | val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist() 93 | train.count() 94 | val stackedAutoencoder = new StackedAutoencoder().setLayers(Array(784, 32)) 95 | .setInputCol("features") 96 | .setOutputCol("output") 97 | .setDataIn01Interval(true) 98 | .setBuildDecoder(false) 99 | val saModel = stackedAutoencoder.fit(train) 100 | val autoWeights = saModel.encoderWeights 101 | val trainer = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10)).setMaxIter(1) 102 | val initialWeights = trainer.fit(train).weights 103 | System.arraycopy(autoWeights.toArray, 0, initialWeights.toArray, 0, autoWeights.toArray.length) 104 | trainer.setInitialWeights(initialWeights).setMaxIter(10) 105 | val model = trainer.fit(train) 106 | ``` 107 | ## Contributions 108 | Contributions are welcome, in particular in the following areas: 109 | - New layers 110 | - Convolutional 111 | - ReLu 112 | - Flexibility 113 | - Implement the reader of Caffe/other deep learning configuration format 114 | - Implement Python/R/Java interface 115 | - Efficiency 116 | - Switch from double to single precision 117 | - Implement wrapper to specialized deep learning libraries, e.g. TensorFlow 118 | - Refactoring 119 | - Implement own version of L-BFGS to remove dependency on breeze 120 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | name := "scalable-deeplearning" 2 | 3 | version := "1.0.0" 4 | 5 | scalaVersion := "2.11.7" 6 | 7 | spName := "avulanov/scalable-deeplearning" 8 | 9 | spShade := true 10 | 11 | sparkVersion := "2.0.0" 12 | 13 | libraryDependencies ++= Seq( 14 | "com.github.fommil.netlib" % "all" % "1.1.2", 15 | "org.scalatest" % "scalatest_2.11" % "2.2.4" % "test" 16 | ) 17 | 18 | sparkComponents += "mllib" 19 | 20 | // libraryDependencies ++= Seq( 21 | // "org.apache.spark" % "spark-core_2.11" % "2.0.0" % "provided", 22 | // "org.apache.spark" % "spark-mllib_2.11" % "2.0.0" % "provided" 23 | // ) 24 | 25 | test in assembly := {} 26 | 27 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | scaladl 4 | scaladl 5 | 1.0.0 6 | ${project.artifactId} 7 | Scalable implementation of Deep Learning for Spark 8 | 2016 9 | 10 | 11 | Apache 12 | http://.... 13 | repo 14 | 15 | 16 | 17 | 18 | 1.6 19 | 1.6 20 | UTF-8 21 | 2.11.5 22 | 2.11 23 | 24 | 25 | 26 | 27 | org.scala-lang 28 | scala-library 29 | ${scala.version} 30 | provided 31 | 32 | 33 | com.github.fommil.netlib 34 | all 35 | 1.1.2 36 | pom 37 | 38 | 39 | org.apache.spark 40 | spark-core_2.11 41 | 2.0.0 42 | provided 43 | 44 | 45 | org.apache.spark 46 | spark-mllib_2.11 47 | 2.0.0 48 | provided 49 | 50 | 51 | 52 | 53 | junit 54 | junit 55 | 4.11 56 | test 57 | 58 | 59 | org.specs2 60 | specs2-core_${scala.compat.version} 61 | 2.4.16 62 | test 63 | 64 | 65 | org.scalatest 66 | scalatest_${scala.compat.version} 67 | 2.2.4 68 | test 69 | 70 | 71 | 72 | 73 | src/main/scala 74 | src/test/scala 75 | 76 | 77 | 78 | net.alchim31.maven 79 | scala-maven-plugin 80 | 3.2.0 81 | 82 | 83 | 84 | compile 85 | testCompile 86 | 87 | 88 | 89 | -dependencyfile 90 | ${project.build.directory}/.scala_dependencies 91 | 92 | 93 | 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-surefire-plugin 99 | 2.18.1 100 | 101 | false 102 | true 103 | 104 | 105 | 106 | **/*Test.* 107 | **/*Suite.* 108 | 109 | 110 | 111 | 112 | org.scalastyle 113 | scalastyle-maven-plugin 114 | 0.8.0 115 | 116 | false 117 | true 118 | false 119 | false 120 | ${basedir}/src/main/scala 121 | ${basedir}/src/test/scala 122 | scalastyle-config.xml 123 | ${basedir}/target/scalastyle-output.xml 124 | UTF-8 125 | UTF-8 126 | 127 | 128 | 129 | 130 | check 131 | 132 | 133 | 134 | 135 | 136 | maven-assembly-plugin 137 | 2.6 138 | 139 | 140 | 141 | scaladl.examples.MnistClassification 142 | 143 | 144 | 145 | jar-with-dependencies 146 | 147 | false 148 | 149 | 150 | 151 | package 152 | 153 | single 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2") 2 | 3 | resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/" 4 | 5 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.5") -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 17 | 39 | 40 | 41 | Scalastyle standard configuration 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | true 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW 126 | 127 | 128 | 129 | 130 | 131 | ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | ^FunSuite[A-Za-z]*$ 141 | Tests must extend org.apache.spark.SparkFunSuite instead. 142 | 143 | 144 | 145 | 146 | ^println$ 147 | 151 | 152 | 153 | 154 | @VisibleForTesting 155 | 158 | 159 | 160 | 161 | Runtime\.getRuntime\.addShutdownHook 162 | 170 | 171 | 172 | 173 | mutable\.SynchronizedBuffer 174 | 182 | 183 | 184 | 185 | Class\.forName 186 | 193 | 194 | 195 | 196 | Await\.result 197 | 204 | 205 | 206 | 207 | 208 | JavaConversions 209 | Instead of importing implicits in scala.collection.JavaConversions._, import 210 | scala.collection.JavaConverters._ and use .asScala / .asJava methods 211 | 212 | 213 | 214 | org\.apache\.commons\.lang\. 215 | Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead 216 | of Commons Lang 2 (package org.apache.commons.lang.*) 217 | 218 | 219 | 220 | 221 | java,scala,3rdParty,spark 222 | javax?\..* 223 | scala\..* 224 | (?!scaladl\.).* 225 | scaladl\..* 226 | 227 | 228 | 229 | 230 | 231 | COMMA 232 | 233 | 234 | 235 | 236 | 237 | \)\{ 238 | 241 | 242 | 243 | 244 | (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] 245 | Use Javadoc style indentation for multiline comments 246 | 247 | 248 | 249 | case[^\n>]*=>\s*\{ 250 | Omit braces in case clauses. 251 | 252 | 253 | 254 | 255 | ^Override$ 256 | override modifier should be used instead of @java.lang.Override. 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 800> 307 | 308 | 309 | 310 | 311 | 30 312 | 313 | 314 | 315 | 316 | 10 317 | 318 | 319 | 320 | 321 | 50 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | -1,0,1,2,3 333 | 334 | 335 | 336 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/ml/scaladl/MultilayerPerceptronClassifier.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.ml.scaladl 19 | 20 | import scala.collection.JavaConverters._ 21 | 22 | import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams} 23 | import org.apache.spark.ml.feature.LabeledPoint 24 | import org.apache.spark.ml.linalg.{Vector, Vectors} 25 | import org.apache.spark.ml.param.{DoubleParam, IntArrayParam, IntParam, Param, ParamMap, 26 | ParamValidators} 27 | import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed, HasTol} 28 | import org.apache.spark.ml.util.Identifiable 29 | import org.apache.spark.sql.Dataset 30 | 31 | import scaladl.layers.{FeedForwardTopology, FeedForwardTrainer} 32 | 33 | /** 34 | * Params for Multilayer Perceptron. 35 | */ 36 | private[ml] trait MultilayerPerceptronParams extends PredictorParams 37 | with HasSeed with HasMaxIter with HasTol { 38 | /** 39 | * Layer sizes including input size and output size. 40 | * Default: Array(1, 1) 41 | * 42 | * @group param 43 | */ 44 | final val layers: IntArrayParam = new IntArrayParam(this, "layers", 45 | "Sizes of layers from input layer to output layer" + 46 | " E.g., Array(780, 100, 10) means 780 inputs, " + 47 | "one hidden layer with 100 neurons and output layer of 10 neurons.", 48 | // TODO: how to check ALSO that all elements are greater than 0? 49 | ParamValidators.arrayLengthGt(1) 50 | ) 51 | 52 | /** @group getParam */ 53 | final def getLayers: Array[Int] = $(layers) 54 | 55 | /** 56 | * Block size for stacking input data in matrices to speed up the computation. 57 | * Data is stacked within partitions. If block size is more than remaining data in 58 | * a partition then it is adjusted to the size of this data. 59 | * Recommended size is between 10 and 1000. 60 | * Default: 128 61 | * 62 | * @group expertParam 63 | */ 64 | final val blockSize: IntParam = new IntParam(this, "blockSize", 65 | "Block size for stacking input data in matrices. Data is stacked within partitions." + 66 | " If block size is more than remaining data in a partition then " + 67 | "it is adjusted to the size of this data. Recommended size is between 10 and 1000", 68 | ParamValidators.gt(0)) 69 | 70 | /** @group getParam */ 71 | final def getBlockSize: Int = $(blockSize) 72 | 73 | /** 74 | * Optimizer setup. 75 | * 76 | * @group expertParam 77 | */ 78 | final val optimizer: Param[String] = new Param[String](this, "optimizer", 79 | " Allows setting the optimizer: minibatch gradient descent (GD) or LBFGS. " + 80 | " The latter is recommended one. ", 81 | ParamValidators.inArray[String](Array("GD", "LBFGS"))) 82 | 83 | /** @group getParam */ 84 | final def getOptimizer: String = $(optimizer) 85 | 86 | /** 87 | * Learning rate. 88 | * 89 | * @group expertParam 90 | */ 91 | final val learningRate: DoubleParam = new DoubleParam(this, "learning rate", 92 | " Sets the learning rate for gradient descent optimizer ", 93 | ParamValidators.inRange(0, 1)) 94 | 95 | /** @group getParam */ 96 | final def getLearningRate: Double = $(learningRate) 97 | 98 | 99 | /** 100 | * The initial weights of the model. 101 | * 102 | * @group expertParam 103 | */ 104 | final val initialWeights: Param[Vector] = new Param[Vector](this, "initialWeights", 105 | "The initial weights of the model") 106 | 107 | /** @group expertGetParam */ 108 | final def getInitialWeights: Vector = $(initialWeights) 109 | 110 | setDefault(maxIter -> 100, tol -> 1e-6, layers -> Array(1, 1), blockSize -> 128, 111 | optimizer -> "LBFGS", learningRate -> 0.03) 112 | } 113 | 114 | /** Label to vector converter. */ 115 | private object LabelConverter { 116 | // TODO: Use OneHotEncoder instead 117 | /** 118 | * Encodes a label as a vector. 119 | * Returns a vector of given length with zeroes at all positions 120 | * and value 1.0 at the position that corresponds to the label. 121 | * 122 | * @param labeledPoint labeled point 123 | * @param labelCount total number of labels 124 | * @return pair of features and vector encoding of a label 125 | */ 126 | def encodeLabeledPoint(labeledPoint: LabeledPoint, labelCount: Int): (Vector, Vector) = { 127 | val output = Array.fill(labelCount)(0.0) 128 | output(labeledPoint.label.toInt) = 1.0 129 | (labeledPoint.features, Vectors.dense(output)) 130 | } 131 | 132 | /** 133 | * Converts a vector to a label. 134 | * Returns the position of the maximal element of a vector. 135 | * 136 | * @param output label encoded with a vector 137 | * @return label 138 | */ 139 | def decodeLabel(output: Vector): Double = { 140 | output.argmax.toDouble 141 | } 142 | } 143 | 144 | /** 145 | * Classifier trainer based on the Multilayer Perceptron. 146 | * Each layer has sigmoid activation function, output layer has softmax. 147 | * Number of inputs has to be equal to the size of feature vectors. 148 | * Number of outputs has to be equal to the total number of labels. 149 | * 150 | */ 151 | class MultilayerPerceptronClassifier (override val uid: String) 152 | extends Predictor[Vector, MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel] 153 | with MultilayerPerceptronParams { 154 | 155 | def this() = this(Identifiable.randomUID("mlpc-scaladl")) 156 | 157 | /** @group setParam */ 158 | def setLayers(value: Array[Int]): this.type = set(layers, value) 159 | 160 | /** @group setParam */ 161 | def setBlockSize(value: Int): this.type = set(blockSize, value) 162 | 163 | /** 164 | * Set the maximum number of iterations. 165 | * Default is 100. 166 | * 167 | * @group setParam 168 | */ 169 | def setMaxIter(value: Int): this.type = set(maxIter, value) 170 | 171 | /** 172 | * Set the convergence tolerance of iterations. 173 | * Smaller value will lead to higher accuracy with the cost of more iterations. 174 | * Default is 1E-4. 175 | * 176 | * @group setParam 177 | */ 178 | def setTol(value: Double): this.type = set(tol, value) 179 | 180 | /** 181 | * Set the seed for weights initialization if weights are not set 182 | * 183 | * @group setParam 184 | */ 185 | def setSeed(value: Long): this.type = set(seed, value) 186 | 187 | /** 188 | * Sets the value of param [[initialWeights]]. 189 | * 190 | * @group expertSetParam 191 | */ 192 | def setInitialWeights(value: Vector): this.type = set(initialWeights, value) 193 | 194 | /** 195 | * Generate weights. 196 | */ 197 | def generateWeights(): Vector = { 198 | val topology = FeedForwardTopology.multiLayerPerceptron($(layers), true) 199 | topology.model($(seed)).weights 200 | } 201 | 202 | override def copy(extra: ParamMap): MultilayerPerceptronClassifier = defaultCopy(extra) 203 | 204 | /** 205 | * Train a model using the given dataset and parameters. 206 | * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation 207 | * and copying parameters into the model. 208 | * 209 | * @param dataset Training dataset 210 | * @return Fitted model 211 | */ 212 | override protected def train(dataset: Dataset[_]): MultilayerPerceptronClassificationModel = { 213 | val myLayers = $(layers) 214 | val labels = myLayers.last 215 | val lpData = extractLabeledPoints(dataset) 216 | val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels)) 217 | val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true) 218 | val trainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last) 219 | if (isDefined(initialWeights)) { 220 | trainer.setWeights($(initialWeights)) 221 | } else { 222 | trainer.setSeed($(seed)) 223 | } 224 | trainer.LBFGSOptimizer 225 | .setConvergenceTol($(tol)) 226 | .setNumIterations($(maxIter)) 227 | trainer.setStackSize($(blockSize)) 228 | val mlpModel = trainer.train(data) 229 | new MultilayerPerceptronClassificationModel(uid, myLayers, mlpModel.weights) 230 | } 231 | } 232 | 233 | /** 234 | * Classification model based on the Multilayer Perceptron. 235 | * Each layer has sigmoid activation function, output layer has softmax. 236 | * 237 | * @param uid uid 238 | * @param layers array of layer sizes including input and output layers 239 | * @param weights vector of initial weights for the model that consists of the weights of layers 240 | * @return prediction model 241 | */ 242 | class MultilayerPerceptronClassificationModel private[ml] (override val uid: String, 243 | val layers: Array[Int], 244 | val weights: Vector) 245 | extends PredictionModel[Vector, MultilayerPerceptronClassificationModel] 246 | with Serializable { 247 | 248 | override val numFeatures: Int = layers.head 249 | 250 | private val mlpModel = FeedForwardTopology.multiLayerPerceptron(layers, true).model(weights) 251 | 252 | /** 253 | * Returns layers in a Java List. 254 | */ 255 | private[ml] def javaLayers: java.util.List[Int] = { 256 | layers.toList.asJava 257 | } 258 | 259 | /** 260 | * Predict label for the given features. 261 | * This internal method is used to implement [[transform()]] and output [[predictionCol]]. 262 | */ 263 | override protected def predict(features: Vector): Double = { 264 | LabelConverter.decodeLabel(mlpModel.predict(features)) 265 | } 266 | 267 | override def copy(extra: ParamMap): MultilayerPerceptronClassificationModel = { 268 | copyValues(new MultilayerPerceptronClassificationModel(uid, layers, weights), extra) 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/ml/scaladl/StackedAutoencoder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.ml.scaladl 19 | 20 | import breeze.linalg.{DenseVector => BDV} 21 | import org.apache.spark.ml.{Estimator, Model} 22 | import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} 23 | import org.apache.spark.ml.param.{BooleanParam, ParamMap, Params} 24 | import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} 25 | import org.apache.spark.ml.util.Identifiable 26 | import org.apache.spark.sql.{DataFrame, Dataset, Row} 27 | import org.apache.spark.sql.functions._ 28 | import org.apache.spark.sql.types.{StructField, StructType} 29 | import org.apache.spark.storage.StorageLevel 30 | 31 | import scaladl.layers.{EmptyLayerWithSquaredError, FeedForwardTopology, FeedForwardTrainer} 32 | 33 | /** 34 | * Params for [[StackedAutoencoder]]. 35 | */ 36 | private[scaladl] trait StackedAutoencoderParams extends Params with HasInputCol with HasOutputCol { 37 | /** 38 | * True if data is in [0, 1] interval. 39 | * Default: false 40 | * 41 | * @group expertParam 42 | */ 43 | final val dataIn01Interval: BooleanParam = new BooleanParam(this, "dataIn01Interval", 44 | "True if data is in [0, 1] interval." + 45 | " Sets the layer on the top of the autoencoder: linear + sigmoid (true) " + 46 | " or linear (false)") 47 | 48 | /** @group getParam */ 49 | final def getDataIn01Interval: Boolean = $(dataIn01Interval) 50 | 51 | /** 52 | * True if one wants to have decoder. 53 | * Default: false 54 | * 55 | * @group expertParam 56 | */ 57 | final val buildDecoder: BooleanParam = new BooleanParam(this, "buildDecoder", 58 | "True to produce a decoder.") 59 | 60 | /** @group getParam */ 61 | final def getBuildDecoder: Boolean = $(buildDecoder) 62 | 63 | /** 64 | * True to cache the intermediate data in memory. Otherwise disk caching is used. 65 | * Default: true 66 | * 67 | * @group expertParam 68 | */ 69 | final val memoryOnlyCaching: BooleanParam = new BooleanParam(this, "memoryOnlyCaching", 70 | "True to cache the intermediate data in memory only.") 71 | 72 | /** @group getParam */ 73 | final def getMemoryOnlyCaching: Boolean = $(memoryOnlyCaching) 74 | 75 | setDefault(dataIn01Interval -> true, buildDecoder -> false, memoryOnlyCaching -> true) 76 | } 77 | 78 | class StackedAutoencoder (override val uid: String) 79 | extends Estimator[StackedAutoencoderModel] 80 | with MultilayerPerceptronParams with StackedAutoencoderParams { 81 | 82 | def this() = this(Identifiable.randomUID("stackedAutoencoder")) 83 | 84 | /** @group setParam */ 85 | def setDataIn01Interval(value: Boolean): this.type = set(dataIn01Interval, value) 86 | 87 | /** @group setParam */ 88 | def setBuildDecoder(value: Boolean): this.type = set(buildDecoder, value) 89 | 90 | // TODO: make sure that user understands how to set it. Make correctness check 91 | /** @group setParam */ 92 | def setLayers(value: Array[Int]): this.type = set(layers, value) 93 | 94 | /** @group setParam */ 95 | def setBlockSize(value: Int): this.type = set(blockSize, value) 96 | 97 | /** @group setParam */ 98 | def setInputCol(value: String): this.type = set(inputCol, value) 99 | 100 | /** @group setParam */ 101 | def setOutputCol(value: String): this.type = set(outputCol, value) 102 | 103 | /** 104 | * Set the maximum number of iterations. 105 | * Default is 100. 106 | * 107 | * @group setParam 108 | */ 109 | def setMaxIter(value: Int): this.type = set(maxIter, value) 110 | 111 | /** 112 | * Set the convergence tolerance of iterations. 113 | * Smaller value will lead to higher accuracy with the cost of more iterations. 114 | * Default is 1E-4. 115 | * 116 | * @group setParam 117 | */ 118 | def setTol(value: Double): this.type = set(tol, value) 119 | 120 | /** 121 | * Set the seed for weights initialization. 122 | * 123 | * @group setParam 124 | */ 125 | def setSeed(value: Long): this.type = set(seed, value) 126 | 127 | /** 128 | * Set the model weights. 129 | * 130 | * @group setParam 131 | */ 132 | def setInitialWeights(value: Vector): this.type = set(initialWeights, value) 133 | 134 | /** 135 | * Fits a model to the input data. 136 | */ 137 | override def fit(dataset: Dataset[_]): StackedAutoencoderModel = { 138 | val storageLevel = 139 | if ($(memoryOnlyCaching)) StorageLevel.MEMORY_ONLY else StorageLevel.DISK_ONLY 140 | var stackedEncoderOffset = 0 141 | val stackedEncoderWeights = if (!this.isSet(this.initialWeights)) { 142 | val size = 143 | FeedForwardTopology.multiLayerPerceptron($(layers)).layers.foldLeft(0)( (b, layer) => 144 | b + layer.weightSize) 145 | new Array[Double](size) 146 | } else { 147 | $(initialWeights).toArray 148 | } 149 | // decoder if needed 150 | var stackedDecoderOffset = 0 151 | val decoderLayers = $(layers).reverse 152 | val stackedDecoderWeights: Array[Double] = if ($(buildDecoder)) { 153 | val size = 154 | FeedForwardTopology.multiLayerPerceptron(decoderLayers).layers.foldLeft(0)( (b, layer) => 155 | b + layer.weightSize) 156 | stackedDecoderOffset = size 157 | new Array[Double](size) 158 | } else { 159 | new Array[Double](0) 160 | } 161 | // TODO: use single instance of vectors 162 | var data = dataset.select($(inputCol)).rdd.map { case Row(x: Vector) => (x, x) } 163 | var previousData = data 164 | val linearInput = !$(dataIn01Interval) 165 | // Train autoencoder for each layer except the last 166 | for (i <- 0 until $(layers).length - 1) { 167 | val currentLayers = Array($(layers)(i), $(layers)(i + 1), $(layers)(i)) 168 | val currentTopology = FeedForwardTopology.multiLayerPerceptron(currentLayers, false) 169 | val isLastLayer = i == $(layers).length - 2 170 | val isFirstLayer = i == 0 171 | if (isFirstLayer && linearInput) { 172 | currentTopology.layers(currentTopology.layers.length - 1) = new EmptyLayerWithSquaredError() 173 | } 174 | val FeedForwardTrainer = 175 | new FeedForwardTrainer(currentTopology, currentLayers(0), currentLayers.last) 176 | .setStackSize($(blockSize)) 177 | .setSeed($(seed)) 178 | FeedForwardTrainer.LBFGSOptimizer 179 | .setConvergenceTol($(tol)) 180 | .setNumIterations($(maxIter)) 181 | val currentModel = FeedForwardTrainer.train(data) 182 | val currentWeights = currentModel.weights.toArray 183 | val encoderWeightSize = currentTopology.layers(0).weightSize 184 | System.arraycopy( 185 | currentWeights, 0, stackedEncoderWeights, stackedEncoderOffset, encoderWeightSize) 186 | stackedEncoderOffset += encoderWeightSize 187 | // input data for the next autoencoder in the stack 188 | if (!isLastLayer) { // intermediate layers 189 | val encoderTopology = FeedForwardTopology.multiLayerPerceptron(currentLayers.init, false) 190 | // Due to Vector inefficiency it will copy weights 191 | val encoderModel = encoderTopology.model( 192 | Vectors.fromBreeze(new BDV[Double](currentWeights, 0, 1, encoderWeightSize))) 193 | // TODO: perform block operations 194 | previousData = data 195 | data = data.map { x => 196 | val y = encoderModel.predict(x._1) 197 | (y, y) 198 | } 199 | // persist and materialize the intermediate data 200 | data.persist(storageLevel) 201 | data.count() 202 | // unpersist the data that is persisted inside the loop 203 | if (!isFirstLayer) previousData.unpersist() 204 | } else { // last layer 205 | // unpersist the data that remains from the last intermediate layer 206 | if (!isFirstLayer) data.unpersist() 207 | } 208 | // if needs decoder 209 | if ($(buildDecoder)) { 210 | val decoderWeightSize = currentWeights.length - encoderWeightSize 211 | stackedDecoderOffset -= decoderWeightSize 212 | System.arraycopy(currentWeights, encoderWeightSize, stackedDecoderWeights, 213 | stackedDecoderOffset, decoderWeightSize) 214 | } 215 | } 216 | new StackedAutoencoderModel(uid + "model", $(layers), Vectors.dense(stackedEncoderWeights), 217 | Vectors.dense(stackedDecoderWeights), linearInput) 218 | } 219 | 220 | override def copy(extra: ParamMap): Estimator[StackedAutoencoderModel] = defaultCopy(extra) 221 | 222 | /** 223 | * :: DeveloperApi :: 224 | * 225 | * Derives the output schema from the input schema. 226 | */ 227 | override def transformSchema(schema: StructType): StructType = { 228 | val inputType = schema($(inputCol)).dataType 229 | require(inputType.isInstanceOf[VectorUDT], 230 | s"Input column ${$(inputCol)} must be a vector column") 231 | require(!schema.fieldNames.contains($(outputCol)), 232 | s"Output column ${$(outputCol)} already exists.") 233 | val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false) 234 | StructType(outputFields) 235 | } 236 | } 237 | 238 | class StackedAutoencoderModel private[ml] ( 239 | override val uid: String, 240 | val layers: Array[Int], 241 | val encoderWeights: Vector, 242 | val decoderWeights: Vector, 243 | linearOutput: Boolean) extends Model[StackedAutoencoderModel] with StackedAutoencoderParams { 244 | 245 | /** @group setParam */ 246 | def setInputCol(value: String): this.type = set(inputCol, value) 247 | 248 | /** @group setParam */ 249 | def setOutputCol(value: String): this.type = set(outputCol, value) 250 | 251 | private val encoderModel = { 252 | val topology = FeedForwardTopology.multiLayerPerceptron(layers, false) 253 | topology.model(encoderWeights) 254 | } 255 | 256 | private val decoderModel = { 257 | if (decoderWeights != null && decoderWeights.size > 0) { 258 | val topology = FeedForwardTopology.multiLayerPerceptron(layers.reverse, false) 259 | if (linearOutput) { 260 | topology.layers(topology.layers.length - 1) = new EmptyLayerWithSquaredError() 261 | } 262 | topology.model(decoderWeights) 263 | } else { 264 | null 265 | } 266 | } 267 | 268 | override def copy(extra: ParamMap): StackedAutoencoderModel = { 269 | copyValues( 270 | new StackedAutoencoderModel(uid, layers, encoderWeights, decoderWeights, linearOutput), extra) 271 | } 272 | 273 | /** 274 | * Transforms the input dataset. 275 | */ 276 | override def transform(dataset: Dataset[_]): DataFrame = { 277 | transformSchema(dataset.schema, logging = true) 278 | val pcaOp = udf { encoderModel.predict _ } 279 | dataset.withColumn($(outputCol), pcaOp(col($(inputCol)))) 280 | } 281 | 282 | def encode(dataset: DataFrame): DataFrame = transform(dataset) 283 | 284 | def decode(dataset: DataFrame): DataFrame = { 285 | // TODO: show something if no decoder 286 | transformSchema(dataset.schema, logging = true) 287 | val pcaOp = udf { decoderModel.predict _ } 288 | dataset.withColumn($(outputCol), pcaOp(col($(inputCol)))) 289 | } 290 | 291 | /** 292 | * :: DeveloperApi :: 293 | * 294 | * Derives the output schema from the input schema. 295 | */ 296 | override def transformSchema(schema: StructType): StructType = { 297 | val inputType = schema($(inputCol)).dataType 298 | require(inputType.isInstanceOf[VectorUDT], 299 | s"Input column ${$(inputCol)} must be a vector column") 300 | require(!schema.fieldNames.contains($(outputCol)), 301 | s"Output column ${$(outputCol)} already exists.") 302 | val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false) 303 | StructType(outputFields) 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/examples/MnistClassification.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.examples 19 | 20 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator 21 | import org.apache.spark.ml.scaladl.MultilayerPerceptronClassifier 22 | import org.apache.spark.sql.SparkSession 23 | 24 | object MnistClassification { 25 | 26 | def main(args: Array[String]): Unit = { 27 | if (args.length != 1) { 28 | System.exit(0) 29 | } 30 | val mnistPath = args(0) 31 | val spark = SparkSession.builder 32 | .appName("my-spark-app") 33 | .config("spark.sql.warehouse.dir", "warehouse-temp") 34 | .getOrCreate() 35 | val mnistTrain = mnistPath + "/mnist.scale" 36 | val mnistTest = mnistPath + "/mnist.scale.t" 37 | // Load the data stored in LIBSVM format as a DataFrame. 38 | // MNIST handwritten recognition data 39 | // https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html 40 | val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist() 41 | val test = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTest).persist() 42 | // materialize data lazily persisted in memory 43 | train.count() 44 | test.count() 45 | // specify layers for the neural network: 46 | // input layer of size 784 (features), one hidden layer of size 100 47 | // and output of size 10 (classes) 48 | val layers = Array[Int](784, 32, 10) 49 | // create the trainer and set its parameters 50 | val trainer = new MultilayerPerceptronClassifier() 51 | .setLayers(layers) 52 | .setBlockSize(128) 53 | .setSeed(1234L) 54 | .setMaxIter(100) 55 | // train the model 56 | val model = trainer.fit(train) 57 | // compute accuracy on the test set 58 | val result = model.transform(test) 59 | val predictionAndLabels = result.select("prediction", "label") 60 | val evaluator = new MulticlassClassificationEvaluator() 61 | .setMetricName("accuracy") 62 | // scalastyle:off 63 | println("Accuracy: " + evaluator.evaluate(predictionAndLabels)) 64 | // scalastyle:on 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/examples/MnistEncoding.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.examples 19 | 20 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator 21 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier, StackedAutoencoder} 22 | import org.apache.spark.sql.SparkSession 23 | 24 | object MnistEncoding { 25 | 26 | def main(args: Array[String]): Unit = { 27 | if (args.length != 1) { 28 | System.exit(0) 29 | } 30 | val mnistPath = args(0) 31 | val spark = SparkSession.builder 32 | .appName("my-spark-app") 33 | .config("spark.sql.warehouse.dir", "warehouse-temp") 34 | .getOrCreate() 35 | val mnistTrain = mnistPath + "/mnist.scale" 36 | val mnistTest = mnistPath + "/mnist.scale.t" 37 | // Load the data stored in LIBSVM format as a DataFrame. 38 | // MNIST handwritten recognition data 39 | // https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html 40 | val train = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTrain).persist() 41 | val test = spark.read.format("libsvm").option("numFeatures", 784).load(mnistTest).persist() 42 | // materialize data lazily persisted in memory 43 | train.count() 44 | test.count() 45 | // specify layers for the neural network: 46 | // input layer of size 784 (features), one hidden layer of size 100 47 | // and output of size 10 (classes) 48 | val layers = Array[Int](784, 32, 10) 49 | // create autoencoder and decode with one hidden layer of 32 neurons 50 | val stackedAutoencoder = new StackedAutoencoder() 51 | .setLayers(layers.init) 52 | .setBlockSize(128) 53 | .setMaxIter(1) 54 | .setSeed(333L) 55 | .setTol(1e-6) 56 | .setInputCol("features") 57 | .setOutputCol("output") 58 | .setDataIn01Interval(true) 59 | .setBuildDecoder(false) 60 | val saModel = stackedAutoencoder.fit(train) 61 | val autoWeights = saModel.encoderWeights 62 | val trainer = new MultilayerPerceptronClassifier() 63 | .setLayers(layers) 64 | .setBlockSize(128) 65 | .setSeed(123456789L) 66 | .setMaxIter(1) 67 | .setTol(1e-6) 68 | val initialWeights = trainer.fit(train).weights 69 | System.arraycopy( 70 | autoWeights.toArray, 0, initialWeights.toArray, 0, autoWeights.toArray.length) 71 | trainer 72 | .setInitialWeights(initialWeights) 73 | .setMaxIter(10) 74 | .setTol(1e-6) 75 | val model = trainer.fit(train) 76 | val result = model.transform(test) 77 | val predictionAndLabels = result.select("prediction", "label") 78 | val evaluator = new MulticlassClassificationEvaluator() 79 | .setMetricName("accuracy") 80 | // scalastyle:off 81 | println("Accuracy: " + evaluator.evaluate(predictionAndLabels)) 82 | // scalastyle:on 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/layers/Layer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.layers 19 | 20 | import java.util.Random 21 | 22 | import org.apache.spark.ml.linalg.{Vector, Vectors} 23 | import org.apache.spark.rdd.RDD 24 | 25 | import scaladl.layers.AnnTypes._ 26 | import scaladl.optimization._ 27 | import scaladl.tensor.DenseTensor 28 | 29 | object AnnTypes { 30 | type Tensor = DenseTensor[Double] 31 | } 32 | 33 | /** 34 | * Trait that holds Layer properties, that are needed to instantiate it. 35 | * Implements Layer instantiation. 36 | * 37 | */ 38 | private[layers] trait Layer extends Serializable { 39 | 40 | /** 41 | * Number of weights that is used to allocate memory for the weights vector 42 | */ 43 | val weightSize: Int 44 | 45 | /** 46 | * Returns the output size given the input size (not counting the stack size). 47 | * Output size is used to allocate memory for the output. 48 | * 49 | * @param inputSize input size 50 | * @return output size 51 | */ 52 | def outputSize(inputSize: Int): Int 53 | 54 | /** 55 | * If true, the memory is not allocated for the output of this layer. 56 | * The memory allocated to the previous layer is used to write the output of this layer. 57 | * Developer can set this to true if computing delta of a previous layer 58 | * does not involve its output, so the current layer can write there. 59 | * This also mean that both layers have the same number of outputs. 60 | */ 61 | val inPlace: Boolean 62 | 63 | /** 64 | * Returns the instance of the layer based on weights provided. 65 | * Size of weights must be equal to weightSize 66 | * 67 | * @param weights vector with layer weights 68 | * @return the layer model 69 | */ 70 | def model(weights: Tensor): LayerModel 71 | /** 72 | * Returns the instance of the layer with random generated weights 73 | * 74 | * @param weights vector for weights initialization, must be equal to weightSize 75 | * @param random random number generator 76 | * @return the layer model 77 | */ 78 | def initModel(weights: Tensor, random: Random): LayerModel 79 | } 80 | 81 | /** 82 | * Trait that holds Layer weights (or parameters). 83 | * Implements functions needed for forward propagation, computing delta and gradient. 84 | * Can return weights in Vector format. 85 | */ 86 | private[layers] trait LayerModel extends Serializable { 87 | 88 | val weights: Tensor 89 | /** 90 | * Evaluates the data (process the data through the layer) 91 | * 92 | * @param data data 93 | * @param output output to write to 94 | */ 95 | def eval(data: Tensor, output: Tensor): Unit 96 | 97 | /** 98 | * Computes the delta for back propagation 99 | * 100 | * @param delta delta of this layer 101 | * @param output output of this layer 102 | * @param pDelta storage for the result, the previous delta 103 | * @return delta 104 | */ 105 | def prevDelta(delta: Tensor, output: Tensor, pDelta: Tensor): Unit 106 | 107 | /** 108 | * Computes the gradient 109 | * 110 | * @param delta delta for this layer 111 | * @param input input data 112 | * @param cumGrad cumulative gradient 113 | * @return gradient 114 | */ 115 | def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit 116 | } 117 | 118 | /** 119 | * Layer properties of affine transformations, that is y=A*x+b 120 | * 121 | * @param numIn number of inputs 122 | * @param numOut number of outputs 123 | */ 124 | private[layers] class AffineLayer(val numIn: Int, val numOut: Int) extends Layer { 125 | 126 | override val weightSize = numIn * numOut + numOut 127 | 128 | override def outputSize(inputSize: Int): Int = numOut 129 | 130 | override val inPlace = false 131 | 132 | override def model(weights: Tensor): LayerModel = new AffineLayerModel(weights, this) 133 | 134 | override def initModel(weights: Tensor, random: Random): LayerModel = 135 | AffineLayerModel(this, weights, random) 136 | } 137 | 138 | /** 139 | * Model of Affine layer 140 | * 141 | * @param weights weights 142 | * @param layer layer properties 143 | */ 144 | private[layers] class AffineLayerModel private[layers]( 145 | val weights: Tensor, 146 | val layer: AffineLayer) extends LayerModel { 147 | val w = DenseTensor(weights.data, Array(layer.numOut, layer.numIn), weights.offset) 148 | val b = DenseTensor(weights.data, Array(layer.numOut), 149 | weights.offset + (layer.numOut * layer.numIn)) 150 | 151 | private var ones: Tensor = null 152 | 153 | override def eval(data: Tensor, output: Tensor): Unit = { 154 | output.fillWith(b) 155 | DenseTensor.gemm(1.0, w, data, 1.0, output) 156 | } 157 | 158 | override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = { 159 | DenseTensor.gemm(1.0, w.transpose, nextDelta, 0.0, delta) 160 | } 161 | 162 | override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = { 163 | // compute gradient of weights 164 | val cumGradientOfWeights = DenseTensor(cumGrad.data, w.shape, cumGrad.offset) 165 | DenseTensor.gemm(1.0 / input.shape(1), delta, input.transpose, 1.0, cumGradientOfWeights) 166 | if (ones == null || ones.shape(0) != delta.shape(1)) ones = 167 | DenseTensor.fill(Array(delta.shape(1)))(1) 168 | 169 | // compute gradient of bias 170 | val cumGradientOfBias = DenseTensor(cumGrad.data, Array(b.shape(0)), cumGrad.offset + w.size) 171 | DenseTensor.gemv(1.0 / input.shape(1), delta, ones, 1.0, cumGradientOfBias) 172 | } 173 | } 174 | 175 | /** 176 | * Fabric for Affine layer models 177 | */ 178 | private[layers] object AffineLayerModel { 179 | 180 | /** 181 | * Creates a model of Affine layer 182 | * 183 | * @param layer layer properties 184 | * @param weights vector for weights initialization 185 | * @param random random number generator 186 | * @return model of Affine layer 187 | */ 188 | def apply(layer: AffineLayer, weights: Tensor, random: Random): AffineLayerModel = { 189 | randomWeights(layer.numIn, layer.numOut, weights, random) 190 | new AffineLayerModel(weights, layer) 191 | } 192 | 193 | /** 194 | * Initialize weights 195 | * 196 | * @param numIn number of inputs 197 | * @param numOut number of outputs 198 | * @param weights vector for weights initialization 199 | * @param random random number generator 200 | */ 201 | def randomWeights( 202 | numIn: Int, 203 | numOut: Int, 204 | weights: Tensor, 205 | random: Random): Unit = { 206 | var i = 0 207 | val sz = weights.size 208 | while (i < sz) { 209 | weights.update(i, (random.nextDouble * 4.8 - 2.4) / numIn) 210 | i += 1 211 | } 212 | } 213 | } 214 | 215 | /** 216 | * Trait for functions and their derivatives for functional layers 217 | */ 218 | private[layers] trait ActivationFunction extends Serializable { 219 | 220 | /** 221 | * Implements a function 222 | */ 223 | def eval: Double => Double 224 | 225 | /** 226 | * Implements a derivative of a function (needed for the back propagation) 227 | */ 228 | def derivative: Double => Double 229 | } 230 | 231 | /** 232 | * Implements Sigmoid activation function 233 | */ 234 | private[layers] class SigmoidFunction extends ActivationFunction { 235 | 236 | override def eval: (Double) => Double = x => 1.0 / (1 + Math.exp(-x)) 237 | 238 | override def derivative: (Double) => Double = z => (1 - z) * z 239 | } 240 | 241 | /** 242 | * Functional layer properties, y = f(x) 243 | * 244 | * @param activationFunction activation function 245 | */ 246 | private[layers] class FunctionalLayer(val activationFunction: ActivationFunction) extends Layer { 247 | 248 | override val weightSize = 0 249 | 250 | override def outputSize(inputSize: Int): Int = inputSize 251 | 252 | override val inPlace = true 253 | 254 | override def model(weights: Tensor): LayerModel = new FunctionalLayerModel(this) 255 | 256 | override def initModel(weights: Tensor, random: Random): LayerModel = model(weights) 257 | } 258 | 259 | /** 260 | * Functional layer model. Holds no weights. 261 | * 262 | * @param layer functiona layer 263 | */ 264 | private[layers] class FunctionalLayerModel private[layers](val layer: FunctionalLayer) 265 | extends LayerModel { 266 | 267 | // empty weights 268 | val weights: Tensor = DenseTensor(Array(0)) 269 | 270 | override def eval(data: Tensor, output: Tensor): Unit = { 271 | DenseTensor.applyFunction(data, output, layer.activationFunction.eval) 272 | } 273 | 274 | override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = { 275 | DenseTensor.applyFunction(input, delta, layer.activationFunction.derivative) 276 | DenseTensor.elementwiseProduct(delta, nextDelta) 277 | } 278 | 279 | override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {} 280 | } 281 | 282 | /** 283 | * Trait for the artificial neural network (ANN) topology properties 284 | */ 285 | private[layers] trait Topology extends Serializable { 286 | def model(weights: Vector): TopologyModel 287 | def model(seed: Long): TopologyModel 288 | } 289 | 290 | /** 291 | * Trait for ANN topology model 292 | */ 293 | private[layers] trait TopologyModel extends Serializable { 294 | 295 | val weights: Vector 296 | /** 297 | * Array of layers 298 | */ 299 | val layers: Array[Layer] 300 | 301 | /** 302 | * Array of layer models 303 | */ 304 | val layerModels: Array[LayerModel] 305 | /** 306 | * Forward propagation 307 | * 308 | * @param data input data 309 | * @return array of outputs for each of the layers 310 | */ 311 | def forward(data: Tensor): Array[Tensor] 312 | 313 | /** 314 | * Prediction of the model 315 | * 316 | * @param data input data 317 | * @return prediction 318 | */ 319 | def predict(data: Vector): Vector 320 | 321 | /** 322 | * Computes gradient for the network 323 | * 324 | * @param data input data 325 | * @param target target output 326 | * @param cumGradient cumulative gradient 327 | * @param blockSize block size 328 | * @return error 329 | */ 330 | def computeGradient(data: Tensor, target: Tensor, cumGradient: Tensor, 331 | blockSize: Int): Double 332 | } 333 | 334 | /** 335 | * Feed forward ANN 336 | * 337 | * @param layers 338 | */ 339 | private[layers] class FeedForwardTopology private(val layers: Array[Layer]) extends Topology { 340 | override def model(weights: Vector): TopologyModel = FeedForwardModel(this, weights) 341 | 342 | override def model(seed: Long): TopologyModel = FeedForwardModel(this, seed) 343 | } 344 | 345 | /** 346 | * Factory for some of the frequently-used topologies 347 | */ 348 | object FeedForwardTopology { 349 | /** 350 | * Creates a feed forward topology from the array of layers 351 | * 352 | * @param layers array of layers 353 | * @return feed forward topology 354 | */ 355 | def apply(layers: Array[Layer]): FeedForwardTopology = { 356 | new FeedForwardTopology(layers) 357 | } 358 | 359 | /** 360 | * Creates a multi-layer perceptron 361 | * 362 | * @param layerSizes sizes of layers including input and output size 363 | * @param softmaxOnTop wether to use SoftMax or Sigmoid function for an output layer. 364 | * Softmax is default 365 | * @return multilayer perceptron topology 366 | */ 367 | def multiLayerPerceptron( 368 | layerSizes: Array[Int], 369 | softmaxOnTop: Boolean = true): FeedForwardTopology = { 370 | val layers = new Array[Layer]((layerSizes.length - 1) * 2) 371 | for(i <- 0 until layerSizes.length - 1) { 372 | layers(i * 2) = new AffineLayer(layerSizes(i), layerSizes(i + 1)) 373 | layers(i * 2 + 1) = 374 | if (i == layerSizes.length - 2) { 375 | if (softmaxOnTop) { 376 | new SoftmaxLayerWithCrossEntropyLoss() 377 | } else { 378 | // TODO: squared error is more natural but converges slower 379 | new SigmoidLayerWithSquaredError() 380 | } 381 | } else { 382 | new FunctionalLayer(new SigmoidFunction()) 383 | } 384 | } 385 | FeedForwardTopology(layers) 386 | } 387 | } 388 | 389 | /** 390 | * Model of Feed Forward Neural Network. 391 | * Implements forward, gradient computation and can return weights in vector format. 392 | * 393 | * @param weights network weights 394 | * @param topology network topology 395 | */ 396 | class FeedForwardModel private( 397 | val weights: Vector, 398 | val topology: FeedForwardTopology) extends TopologyModel { 399 | val layers = topology.layers 400 | val layerModels = new Array[LayerModel](layers.length) 401 | private var offset = 0 402 | for (i <- 0 until layers.length) { 403 | layerModels(i) = layers(i).model( 404 | DenseTensor(weights.toArray, Array(layers(i).weightSize), offset)) 405 | offset += layers(i).weightSize 406 | } 407 | private var outputs: Array[Tensor] = null 408 | private var deltas: Array[Tensor] = null 409 | 410 | override def forward(data: Tensor): Array[Tensor] = { 411 | // Initialize output arrays for all layers. Special treatment for InPlace 412 | val currentBatchSize = data.shape(1) 413 | // TODO: allocate outputs as one big array and then create BDMs from it 414 | if (outputs == null || outputs(0).shape(1) != currentBatchSize) { 415 | outputs = new Array[Tensor](layers.length) 416 | var inputSize = data.shape(0) 417 | for (i <- 0 until layers.length) { 418 | if (layers(i).inPlace) { 419 | outputs(i) = outputs(i - 1) 420 | } else { 421 | val outputSize = layers(i).outputSize(inputSize) 422 | outputs(i) = DenseTensor(Array(outputSize, currentBatchSize)) 423 | inputSize = outputSize 424 | } 425 | } 426 | } 427 | layerModels(0).eval(data, outputs(0)) 428 | for (i <- 1 until layerModels.length) { 429 | layerModels(i).eval(outputs(i - 1), outputs(i)) 430 | } 431 | outputs 432 | } 433 | 434 | override def computeGradient( 435 | data: Tensor, 436 | target: Tensor, 437 | cumGradient: Tensor, 438 | realBatchSize: Int): Double = { 439 | val outputs = forward(data) 440 | val currentBatchSize = data.shape(1) 441 | // TODO: allocate deltas as one big array and then create BDMs from it 442 | if (deltas == null || deltas(0).shape(1) != currentBatchSize) { 443 | deltas = new Array[Tensor](layerModels.length) 444 | var inputSize = data.shape(0) 445 | for (i <- 0 until layerModels.length - 1) { 446 | val outputSize = layers(i).outputSize(inputSize) 447 | deltas(i) = new Tensor(Array(outputSize, currentBatchSize)) 448 | inputSize = outputSize 449 | } 450 | } 451 | val L = layerModels.length - 1 452 | // TODO: explain why delta of top layer is null (because it might contain loss+layer) 453 | val loss = layerModels.last match { 454 | case levelWithError: LossFunction => levelWithError.loss(outputs.last, target, deltas(L - 1)) 455 | case _ => 456 | throw new UnsupportedOperationException("Top layer is required to have objective.") 457 | } 458 | for (i <- (L - 2) to (0, -1)) { 459 | layerModels(i + 1).prevDelta(deltas(i + 1), outputs(i + 1), deltas(i)) 460 | } 461 | val cumGradientArray = cumGradient.data 462 | var offset = 0 463 | for (i <- 0 until layerModels.length) { 464 | val input = if (i == 0) data else outputs(i - 1) 465 | layerModels(i).grad(deltas(i), input, 466 | new Tensor(cumGradientArray, Array(layers(i).weightSize), offset)) 467 | offset += layers(i).weightSize 468 | } 469 | loss 470 | } 471 | 472 | override def predict(data: Vector): Vector = { 473 | val size = data.size 474 | val result = forward(DenseTensor(data.toArray, Array(size, 1))) 475 | // TODO: check that it was OK not to clone in the previous version 476 | Vectors.dense(result.last.data.clone()) 477 | } 478 | } 479 | 480 | /** 481 | * Fabric for feed forward ANN models 482 | */ 483 | private[layers] object FeedForwardModel { 484 | 485 | /** 486 | * Creates a model from a topology and weights 487 | * 488 | * @param topology topology 489 | * @param weights weights 490 | * @return model 491 | */ 492 | def apply(topology: FeedForwardTopology, weights: Vector): FeedForwardModel = { 493 | // TODO: check that weights size is equal to sum of layers sizes 494 | new FeedForwardModel(weights, topology) 495 | } 496 | 497 | /** 498 | * Creates a model given a topology and seed 499 | * 500 | * @param topology topology 501 | * @param seed seed for generating the weights 502 | * @return model 503 | */ 504 | def apply(topology: FeedForwardTopology, seed: Long = 11L): FeedForwardModel = { 505 | val layers = topology.layers 506 | val layerModels = new Array[LayerModel](layers.length) 507 | var totalSize = 0 508 | for (i <- 0 until topology.layers.length) { 509 | totalSize += topology.layers(i).weightSize 510 | } 511 | val weights: Tensor = DenseTensor(Array(totalSize)) 512 | var offset = 0 513 | // TODO: check if we can re-use XORShiftRandom 514 | val random = new Random(seed) 515 | for(i <- 0 until layers.length) { 516 | layerModels(i) = layers(i). 517 | initModel(DenseTensor(weights.data, Array(layers(i).weightSize), offset), random) 518 | offset += layers(i).weightSize 519 | } 520 | new FeedForwardModel(Vectors.dense(weights.data), topology) 521 | } 522 | } 523 | 524 | /** 525 | * Neural network gradient. Does nothing but calling Model's gradient 526 | * 527 | * @param topology topology 528 | * @param dataStacker data stacker 529 | */ 530 | private[layers] class ANNGradient(topology: Topology, dataStacker: DataStacker) extends Gradient { 531 | 532 | override def compute(data: Vector, label: Double, weights: Tensor): (Tensor, Double) = { 533 | val gradient = new Tensor(Array(weights.size)) 534 | val loss = compute(data, label, weights, gradient) 535 | (gradient, loss) 536 | } 537 | 538 | override def compute( 539 | data: Vector, 540 | label: Double, 541 | weights: Tensor, 542 | cumGradient: Tensor): Double = { 543 | val (input, target, realBatchSize) = dataStacker.unstack(data) 544 | val model = topology.model(Vectors.dense(weights.data)) 545 | model.computeGradient(input, target, cumGradient, realBatchSize) 546 | } 547 | } 548 | 549 | /** 550 | * Stacks pairs of training samples (input, output) in one vector allowing them to pass 551 | * through Optimizer/Gradient interfaces. If stackSize is more than one, makes blocks 552 | * or matrices of inputs and outputs and then stack them in one vector. 553 | * This can be used for further batch computations after unstacking. 554 | * 555 | * @param stackSize stack size 556 | * @param inputSize size of the input vectors 557 | * @param outputSize size of the output vectors 558 | */ 559 | private[layers] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) 560 | extends Serializable { 561 | 562 | /** 563 | * Stacks the data 564 | * 565 | * @param data RDD of vector pairs 566 | * @return RDD of double (always zero) and vector that contains the stacked vectors 567 | */ 568 | def stack(data: RDD[(Vector, Vector)]): RDD[(Double, Vector)] = { 569 | val stackedData = if (stackSize == 1) { 570 | data.map { v => 571 | val bigVector = new Array[Double](v._1.size + v._2.size) 572 | System.arraycopy(v._1.toArray, 0, bigVector, 0, v._1.size) 573 | System.arraycopy(v._2.toArray, 0, bigVector, v._1.size, v._2.size) 574 | (0.0, Vectors.dense(bigVector)) 575 | } 576 | } else { 577 | data.mapPartitions { it => 578 | it.grouped(stackSize).map { seq => 579 | val size = seq.size 580 | val bigVector = new Array[Double](inputSize * size + outputSize * size) 581 | var i = 0 582 | seq.foreach { case (in, out) => 583 | System.arraycopy(in.toArray, 0, bigVector, i * inputSize, inputSize) 584 | System.arraycopy(out.toArray, 0, bigVector, 585 | inputSize * size + i * outputSize, outputSize) 586 | i += 1 587 | } 588 | (0.0, Vectors.dense(bigVector)) 589 | } 590 | } 591 | } 592 | stackedData 593 | } 594 | 595 | /** 596 | * Unstack the stacked vectors into matrices for batch operations 597 | * 598 | * @param data stacked vector 599 | * @return pair of matrices holding input and output data and the real stack size 600 | */ 601 | def unstack(data: Vector): (Tensor, Tensor, Int) = { 602 | val arrData = data.toArray 603 | val realStackSize = arrData.length / (inputSize + outputSize) 604 | val input = DenseTensor(arrData, Array(inputSize, realStackSize)) 605 | val target = DenseTensor(arrData, Array(outputSize, realStackSize), inputSize * realStackSize) 606 | (input, target, realStackSize) 607 | } 608 | } 609 | 610 | /** 611 | * Simple updater 612 | */ 613 | private[layers] class ANNUpdater extends Updater { 614 | 615 | override def compute( 616 | weightsOld: Tensor, 617 | gradient: Tensor, 618 | stepSize: Double, 619 | iter: Int, 620 | regParam: Double): (Tensor, Double) = { 621 | val thisIterStepSize = stepSize 622 | DenseTensor.axpy(-thisIterStepSize, gradient, weightsOld) 623 | (weightsOld, 0) 624 | } 625 | } 626 | 627 | /** 628 | * MLlib-style trainer class that trains a network given the data and topology 629 | * 630 | * @param topology topology of ANN 631 | * @param inputSize input size 632 | * @param outputSize output size 633 | */ 634 | class FeedForwardTrainer( 635 | topology: Topology, 636 | val inputSize: Int, 637 | val outputSize: Int) extends Serializable { 638 | 639 | private var _seed = 11L 640 | private var _weights: Vector = null 641 | private var _stackSize = 128 642 | private var dataStacker = new DataStacker(_stackSize, inputSize, outputSize) 643 | private var _gradient: Gradient = new ANNGradient(topology, dataStacker) 644 | private var _updater: Updater = new ANNUpdater() 645 | private var optimizer: Optimizer = LBFGSOptimizer.setConvergenceTol(1e-4).setNumIterations(100) 646 | 647 | /** 648 | * Returns seed 649 | * 650 | * @return seed 651 | */ 652 | def getSeed: Long = _seed 653 | 654 | /** 655 | * Sets seed 656 | * 657 | * @param value seed 658 | * @return trainer 659 | */ 660 | def setSeed(value: Long): FeedForwardTrainer = { 661 | _seed = value 662 | this 663 | } 664 | 665 | /** 666 | * Returns weights 667 | * 668 | * @return weights 669 | */ 670 | def getWeights: Vector = _weights 671 | 672 | /** 673 | * Sets weights 674 | * 675 | * @param value weights 676 | * @return trainer 677 | */ 678 | def setWeights(value: Vector): FeedForwardTrainer = { 679 | _weights = value 680 | this 681 | } 682 | 683 | /** 684 | * Sets the stack size 685 | * 686 | * @param value stack size 687 | * @return trainer 688 | */ 689 | def setStackSize(value: Int): FeedForwardTrainer = { 690 | _stackSize = value 691 | dataStacker = new DataStacker(value, inputSize, outputSize) 692 | this 693 | } 694 | 695 | /** 696 | * Sets the SGD optimizer 697 | * 698 | * @return SGD optimizer 699 | */ 700 | def SGDOptimizer: GradientDescent = { 701 | val sgd = new GradientDescent(_gradient, _updater) 702 | optimizer = sgd 703 | sgd 704 | } 705 | 706 | /** 707 | * Sets the LBFGS optimizer 708 | * 709 | * @return LBGS optimizer 710 | */ 711 | def LBFGSOptimizer: LBFGS = { 712 | val lbfgs = new LBFGS(_gradient, _updater) 713 | optimizer = lbfgs 714 | lbfgs 715 | } 716 | 717 | /** 718 | * Sets the updater 719 | * 720 | * @param value updater 721 | * @return trainer 722 | */ 723 | def setUpdater(value: Updater): FeedForwardTrainer = { 724 | _updater = value 725 | updateUpdater(value) 726 | this 727 | } 728 | 729 | /** 730 | * Sets the gradient 731 | * 732 | * @param value gradient 733 | * @return trainer 734 | */ 735 | def setGradient(value: Gradient): FeedForwardTrainer = { 736 | _gradient = value 737 | updateGradient(value) 738 | this 739 | } 740 | 741 | private[this] def updateGradient(gradient: Gradient): Unit = { 742 | optimizer match { 743 | case lbfgs: LBFGS => lbfgs.setGradient(gradient) 744 | case sgd: GradientDescent => sgd.setGradient(gradient) 745 | case other => throw new UnsupportedOperationException( 746 | s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") 747 | } 748 | } 749 | 750 | private[this] def updateUpdater(updater: Updater): Unit = { 751 | optimizer match { 752 | case lbfgs: LBFGS => lbfgs.setUpdater(updater) 753 | case sgd: GradientDescent => sgd.setUpdater(updater) 754 | case other => throw new UnsupportedOperationException( 755 | s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") 756 | } 757 | } 758 | 759 | /** 760 | * Trains the ANN 761 | * 762 | * @param data RDD of input and output vector pairs 763 | * @return model 764 | */ 765 | def train(data: RDD[(Vector, Vector)]): TopologyModel = { 766 | val w = if (getWeights == null) { 767 | // TODO: will make a copy if vector is a subvector of BDV (see Vectors code) 768 | topology.model(_seed).weights 769 | } else { 770 | getWeights 771 | } 772 | // TODO: deprecate standard optimizer because it needs Vector 773 | val newWeights = optimizer.optimize(dataStacker.stack(data), 774 | new Tensor(w.toArray, Array(w.size), 0)) 775 | topology.model(Vectors.dense(newWeights.data)) 776 | } 777 | } 778 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/layers/LossFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.layers 19 | 20 | import java.util.Random 21 | 22 | import scaladl.layers.AnnTypes._ 23 | import scaladl.tensor.DenseTensor 24 | 25 | /** 26 | * Trait for loss function 27 | */ 28 | private[layers] trait LossFunction { 29 | /** 30 | * Loss function 31 | * 32 | * @param output actual output 33 | * @param target target output 34 | * @param delta output delta to write to 35 | * @return 36 | */ 37 | def loss(output: Tensor, target: Tensor, delta: Tensor): Double 38 | } 39 | 40 | class SigmoidLayerWithSquaredError extends Layer { 41 | override val weightSize = 0 42 | override def outputSize(inputSize: Int): Int = inputSize 43 | override val inPlace = true 44 | override def model(weights: Tensor): LayerModel = new SigmoidLayerModelWithSquaredError() 45 | override def initModel(weights: Tensor, random: Random): LayerModel = 46 | new SigmoidLayerModelWithSquaredError() 47 | } 48 | 49 | private[layers] class SigmoidLayerModelWithSquaredError 50 | extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction { 51 | override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = { 52 | DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t) 53 | val error = (delta :* delta).sum / 2 / output.shape(1) 54 | DenseTensor.applyFunction(delta, output, delta, (x: Double, o: Double) => x * (o - o * o)) 55 | error 56 | } 57 | } 58 | 59 | class SoftmaxLayerWithCrossEntropyLoss extends Layer { 60 | override val weightSize = 0 61 | override def outputSize(inputSize: Int): Int = inputSize 62 | override val inPlace = true 63 | override def model(weights: Tensor): LayerModel = 64 | new SoftmaxLayerModelWithCrossEntropyLoss() 65 | override def initModel(weights: Tensor, random: Random): LayerModel = 66 | new SoftmaxLayerModelWithCrossEntropyLoss() 67 | } 68 | 69 | private[layers] class SoftmaxLayerModelWithCrossEntropyLoss extends LayerModel with LossFunction { 70 | 71 | private val epsilon = 1e-15 72 | private var epsilonMatrix: Tensor = null 73 | 74 | val weights: Tensor = DenseTensor(Array(0)) 75 | 76 | def inplaceEval(x: Tensor, y: Tensor): Unit = { 77 | require(x.shape.length == 2 && y.shape.length == 2 78 | && x.shape(0) == y.shape(0) && x.shape(1) == y.shape(1), 79 | "X and Y must be 2 dim and of equal size") 80 | var j = 0 81 | // find max value to make sure later that exponent is computable 82 | while (j < x.shape(1)) { 83 | var i = 0 84 | var max = Double.MinValue 85 | while (i < x.shape(0)) { 86 | if (x.value(Array(i, j)) > max) { 87 | max = x.value(Array(i, j)) 88 | } 89 | i += 1 90 | } 91 | var sum = 0.0 92 | i = 0 93 | while (i < x.shape(0)) { 94 | val res = Math.exp(x.value(Array(i, j)) - max) 95 | y.update(Array(i, j), res) 96 | sum += res 97 | i += 1 98 | } 99 | i = 0 100 | while (i < x.shape(0)) { 101 | val avg = y.value(Array(i, j)) / sum 102 | y.update(Array(i, j), avg) 103 | i += 1 104 | } 105 | j += 1 106 | } 107 | } 108 | 109 | override def eval(data: Tensor, output: Tensor): Unit = { 110 | inplaceEval(data, output) 111 | } 112 | override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {} 113 | 114 | override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {} 115 | 116 | override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = { 117 | if (epsilonMatrix == null || epsilonMatrix.shape(1) != target.shape(1)) { 118 | epsilonMatrix = DenseTensor.fill(target.shape)(epsilon) 119 | } 120 | DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t) 121 | val temp = output + epsilonMatrix 122 | DenseTensor.applyFunction(temp, Math.log) 123 | -(target :* temp).sum / output.shape(1) 124 | } 125 | } 126 | 127 | class EmptyLayerWithSquaredError extends Layer { 128 | override val weightSize = 0 129 | override def outputSize(inputSize: Int): Int = inputSize 130 | override val inPlace = true 131 | override def model(weights: Tensor): LayerModel = 132 | new EmptyLayerModelWithSquaredError() 133 | override def initModel(weights: Tensor, random: Random): LayerModel = 134 | new EmptyLayerModelWithSquaredError() 135 | } 136 | 137 | private[layers] class EmptyLayerModelWithSquaredError extends LayerModel with LossFunction { 138 | 139 | val weights: Tensor = DenseTensor(Array(0)) 140 | 141 | override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = { 142 | DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t) 143 | (delta :* delta).sum / 2 / output.shape(1) 144 | } 145 | 146 | override def eval(data: Tensor, output: Tensor): Unit = {} 147 | override def prevDelta(nextDelta: Tensor, input: Tensor, delta: Tensor): Unit = {} 148 | override def grad(delta: Tensor, input: Tensor, cumGrad: Tensor): Unit = {} 149 | } 150 | 151 | class SigmoidLayerWithCrossEntropyLoss extends Layer { 152 | override val weightSize = 0 153 | override def outputSize(inputSize: Int): Int = inputSize 154 | override val inPlace = true 155 | override def model(weights: Tensor): LayerModel = 156 | new SigmoidLayerModelWithCrossEntropyLoss() 157 | override def initModel(weights: Tensor, random: Random): LayerModel = 158 | new SigmoidLayerModelWithCrossEntropyLoss() 159 | } 160 | 161 | private[layers] class SigmoidLayerModelWithCrossEntropyLoss 162 | extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction { 163 | // TODO: make a common place where ones matrices reside 164 | private var oneMatrix: Tensor = null 165 | private val epsilon = 1e-15 166 | private var epsilonMatrix: Tensor = null 167 | 168 | override def loss(output: Tensor, target: Tensor, delta: Tensor): Double = { 169 | if (oneMatrix == null || oneMatrix.shape(1) != target.shape(1)) { 170 | oneMatrix = DenseTensor.fill(target.shape)(1) 171 | } 172 | if (epsilonMatrix == null || epsilonMatrix.shape(1) != target.shape(1)) { 173 | epsilonMatrix = DenseTensor.fill(target.shape)(epsilon) 174 | } 175 | DenseTensor.applyFunction(output, target, delta, (o: Double, t: Double) => o - t) 176 | // NB: operation :* don't have execution priority over summation 177 | // TODO: is adding epsilon a good way to fight log(o) ? 178 | val temp1 = output + epsilonMatrix; 179 | DenseTensor.applyFunction(temp1, Math.log) 180 | val temp2 = oneMatrix - output + epsilonMatrix 181 | DenseTensor.applyFunction(temp2, Math.log) 182 | -((target :* temp1) + ((oneMatrix - target) :* temp2)).sum / output.shape(1) 183 | } 184 | } 185 | 186 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/optimization/Gradient.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.optimization 19 | 20 | import org.apache.spark.ml.linalg.Vector 21 | 22 | import scaladl.layers.AnnTypes.Tensor 23 | 24 | /** 25 | * :: DeveloperApi :: 26 | * Class used to compute the gradient for a loss function, given a single data point. 27 | */ 28 | abstract class Gradient extends Serializable { 29 | /** 30 | * Compute the gradient and loss given the features of a single data point. 31 | * 32 | * @param data features for one data point 33 | * @param label label for this data point 34 | * @param weights weights/coefficients corresponding to features 35 | * @return (gradient: Vector, loss: Double) 36 | */ 37 | def compute(data: Vector, label: Double, weights: Tensor): (Tensor, Double) = { 38 | val gradient = new Tensor(Array(weights.size)) 39 | val loss = compute(data, label, weights, gradient) 40 | (gradient, loss) 41 | } 42 | 43 | /** 44 | * Compute the gradient and loss given the features of a single data point, 45 | * add the gradient to a provided vector to avoid creating new objects, and return loss. 46 | * 47 | * @param data features for one data point 48 | * @param label label for this data point 49 | * @param weights weights/coefficients corresponding to features 50 | * @param cumGradient the computed gradient will be added to this vector 51 | * @return loss 52 | */ 53 | def compute(data: Vector, label: Double, weights: Tensor, cumGradient: Tensor): Double 54 | } 55 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/optimization/GradientDescent.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.optimization 19 | 20 | import scala.collection.mutable.ArrayBuffer 21 | 22 | import org.apache.log4j.{Level, LogManager} 23 | import org.apache.spark.ml.linalg.Vector 24 | import org.apache.spark.rdd.RDD 25 | 26 | import scaladl.layers.AnnTypes.Tensor 27 | import scaladl.tensor.DenseTensor 28 | 29 | /** 30 | * Class used to solve an optimization problem using Gradient Descent. 31 | * 32 | * @param gradient Gradient function to be used. 33 | * @param updater Updater to be used to update weights after every iteration. 34 | */ 35 | class GradientDescent (private var gradient: Gradient, private var updater: Updater) 36 | extends Optimizer { 37 | 38 | private var stepSize: Double = 1.0 39 | private var numIterations: Int = 100 40 | private var regParam: Double = 0.0 41 | private var miniBatchFraction: Double = 1.0 42 | private var convergenceTol: Double = 0.001 43 | 44 | /** 45 | * Set the initial step size of SGD for the first step. Default 1.0. 46 | * In subsequent steps, the step size will decrease with stepSize/sqrt(t) 47 | */ 48 | def setStepSize(step: Double): this.type = { 49 | this.stepSize = step 50 | this 51 | } 52 | 53 | /** 54 | * Set fraction of data to be used for each SGD iteration. 55 | * Default 1.0 (corresponding to deterministic/classical gradient descent) 56 | */ 57 | def setMiniBatchFraction(fraction: Double): this.type = { 58 | this.miniBatchFraction = fraction 59 | this 60 | } 61 | 62 | /** 63 | * Set the number of iterations for SGD. Default 100. 64 | */ 65 | def setNumIterations(iters: Int): this.type = { 66 | this.numIterations = iters 67 | this 68 | } 69 | 70 | /** 71 | * Set the regularization parameter. Default 0.0. 72 | */ 73 | def setRegParam(regParam: Double): this.type = { 74 | this.regParam = regParam 75 | this 76 | } 77 | 78 | /** 79 | * Set the convergence tolerance. Default 0.001 80 | * convergenceTol is a condition which decides iteration termination. 81 | * The end of iteration is decided based on below logic. 82 | * 83 | * - If the norm of the new solution vector is >1, the diff of solution vectors 84 | * is compared to relative tolerance which means normalizing by the norm of 85 | * the new solution vector. 86 | * - If the norm of the new solution vector is <=1, the diff of solution vectors 87 | * is compared to absolute tolerance which is not normalizing. 88 | * 89 | * Must be between 0.0 and 1.0 inclusively. 90 | */ 91 | def setConvergenceTol(tolerance: Double): this.type = { 92 | require(0.0 <= tolerance && tolerance <= 1.0) 93 | this.convergenceTol = tolerance 94 | this 95 | } 96 | 97 | /** 98 | * Set the gradient function (of the loss function of one single data example) 99 | * to be used for SGD. 100 | */ 101 | def setGradient(gradient: Gradient): this.type = { 102 | this.gradient = gradient 103 | this 104 | } 105 | 106 | 107 | /** 108 | * Set the updater function to actually perform a gradient step in a given direction. 109 | * The updater is responsible to perform the update from the regularization term as well, 110 | * and therefore determines what kind or regularization is used, if any. 111 | */ 112 | def setUpdater(updater: Updater): this.type = { 113 | this.updater = updater 114 | this 115 | } 116 | 117 | /** 118 | * Runs gradient descent on the given training data. 119 | * 120 | * @param data training data 121 | * @param initialWeights initial weights 122 | * @return solution vector 123 | */ 124 | def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor = { 125 | val (weights, _) = GradientDescent.runMiniBatchSGD( 126 | data, 127 | gradient, 128 | updater, 129 | stepSize, 130 | numIterations, 131 | regParam, 132 | miniBatchFraction, 133 | initialWeights, 134 | convergenceTol) 135 | weights 136 | } 137 | 138 | } 139 | 140 | /** 141 | * :: DeveloperApi :: 142 | * Top-level method to run gradient descent. 143 | */ 144 | object GradientDescent { 145 | /** 146 | * Run stochastic gradient descent (SGD) in parallel using mini batches. 147 | * In each iteration, we sample a subset (fraction miniBatchFraction) of the total data 148 | * in order to compute a gradient estimate. 149 | * Sampling, and averaging the subgradients over this subset is performed using one standard 150 | * spark map-reduce in each iteration. 151 | * 152 | * @param data Input data for SGD. RDD of the set of data examples, each of 153 | * the form (label, [feature values]). 154 | * @param gradient Gradient object (used to compute the gradient of the loss function of 155 | * one single data example) 156 | * @param updater Updater function to actually perform a gradient step in a given direction. 157 | * @param stepSize initial step size for the first step 158 | * @param numIterations number of iterations that SGD should be run. 159 | * @param regParam regularization parameter 160 | * @param miniBatchFraction fraction of the input data set that should be used for 161 | * one iteration of SGD. Default value 1.0. 162 | * @param convergenceTol Minibatch iteration will end before numIterations if the relative 163 | * difference between the current weight and the previous weight is less 164 | * than this value. In measuring convergence, L2 norm is calculated. 165 | * Default value 0.001. Must be between 0.0 and 1.0 inclusively. 166 | * @return A tuple containing two elements. The first element is a column matrix containing 167 | * weights for every feature, and the second element is an array containing the 168 | * stochastic loss computed for every iteration. 169 | */ 170 | def runMiniBatchSGD( 171 | data: RDD[(Double, Vector)], 172 | gradient: Gradient, 173 | updater: Updater, 174 | stepSize: Double, 175 | numIterations: Int, 176 | regParam: Double, 177 | miniBatchFraction: Double, 178 | initialWeights: Tensor, 179 | convergenceTol: Double): (Tensor, Array[Double]) = { 180 | val log = LogManager.getRootLogger 181 | 182 | def logWarning(msg: => String) { 183 | if (log.isEnabledFor(Level.WARN)) log.warn(msg) 184 | } 185 | def logInfo(msg: => String) { 186 | if (log.isEnabledFor(Level.INFO)) log.info(msg) 187 | } 188 | 189 | 190 | // convergenceTol should be set with non minibatch settings 191 | if (miniBatchFraction < 1.0 && convergenceTol > 0.0) { 192 | logWarning("Testing against a convergenceTol when using miniBatchFraction " + 193 | "< 1.0 can be unstable because of the stochasticity in sampling.") 194 | } 195 | 196 | val stochasticLossHistory = new ArrayBuffer[Double](numIterations) 197 | // Record previous weight and current one to calculate solution vector difference 198 | 199 | var previousWeights: Option[Tensor] = None 200 | var currentWeights: Option[Tensor] = None 201 | 202 | val numExamples = data.count() 203 | 204 | // if no data, return initial weights to avoid NaNs 205 | if (numExamples == 0) { 206 | logWarning("GradientDescent.runMiniBatchSGD returning initial weights, no data found") 207 | return (initialWeights, stochasticLossHistory.toArray) 208 | } 209 | 210 | if (numExamples * miniBatchFraction < 1) { 211 | logWarning("The miniBatchFraction is too small") 212 | } 213 | 214 | // Initialize weights as a column vector 215 | var weights = initialWeights 216 | val n = weights.size 217 | 218 | /** 219 | * For the first iteration, the regVal will be initialized as sum of weight squares 220 | * if it's L2 updater; for L1 updater, the same logic is followed. 221 | */ 222 | var regVal = updater.compute( 223 | weights, new Tensor(Array(weights.size)), 0, 1, regParam)._2 224 | 225 | var converged = false // indicates whether converged based on convergenceTol 226 | var i = 1 227 | while (!converged && i <= numIterations) { 228 | val bcWeights = data.context.broadcast(weights) 229 | // Sample a subset (fraction miniBatchFraction) of the total data 230 | // compute and sum up the subgradients on this subset (this is one map-reduce) 231 | val (gradientSum, lossSum, miniBatchSize) = data.sample(false, miniBatchFraction, 42 + i) 232 | .treeAggregate((new Tensor(Array(n)), 0.0, 0L))( 233 | seqOp = (c, v) => { 234 | // c: (grad, loss, count), v: (label, features) 235 | val l = gradient.compute(v._2, v._1, bcWeights.value, c._1) 236 | (c._1, c._2 + l, c._3 + 1) 237 | }, 238 | combOp = (c1, c2) => { 239 | // c: (grad, loss, count) 240 | DenseTensor.axpy(1, c2._1, c1._1) 241 | (c1._1, c1._2 + c2._2, c1._3 + c2._3) 242 | }) 243 | 244 | if (miniBatchSize > 0) { 245 | /** 246 | * lossSum is computed using the weights from the previous iteration 247 | * and regVal is the regularization value computed in the previous iteration as well. 248 | */ 249 | stochasticLossHistory.append(lossSum / miniBatchSize + regVal) 250 | DenseTensor.scal(miniBatchSize.toDouble, gradientSum) 251 | val update = updater.compute( 252 | weights, gradientSum, 253 | stepSize, i, regParam) 254 | weights = update._1 255 | regVal = update._2 256 | 257 | previousWeights = currentWeights 258 | currentWeights = Some(weights) 259 | if (previousWeights != None && currentWeights != None) { 260 | converged = isConverged(previousWeights.get, 261 | currentWeights.get, convergenceTol) 262 | } 263 | } else { 264 | logWarning(s"Iteration ($i/$numIterations). The size of sampled batch is zero") 265 | } 266 | i += 1 267 | } 268 | 269 | logInfo("GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses %s".format( 270 | stochasticLossHistory.takeRight(10).mkString(", "))) 271 | 272 | (weights, stochasticLossHistory.toArray) 273 | 274 | } 275 | 276 | 277 | /** 278 | * Alias of [[runMiniBatchSGD]] with convergenceTol set to default value of 0.001. 279 | */ 280 | def runMiniBatchSGD( 281 | data: RDD[(Double, Vector)], 282 | gradient: Gradient, 283 | updater: Updater, 284 | stepSize: Double, 285 | numIterations: Int, 286 | regParam: Double, 287 | miniBatchFraction: Double, 288 | initialWeights: Tensor): (Tensor, Array[Double]) = 289 | GradientDescent.runMiniBatchSGD(data, gradient, updater, stepSize, numIterations, 290 | regParam, miniBatchFraction, initialWeights, 0.001) 291 | 292 | 293 | private def isConverged( 294 | previousWeights: Tensor, 295 | currentWeights: Tensor, 296 | convergenceTol: Double): Boolean = { 297 | // This represents the difference of updated weights in the iteration. 298 | val solutionVecDiff: Double = (previousWeights - currentWeights).norm 299 | 300 | solutionVecDiff < convergenceTol * Math.max(currentWeights.norm, 1.0) 301 | } 302 | 303 | } 304 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/optimization/LBFGS.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.optimization 19 | 20 | import scala.collection.mutable 21 | 22 | import breeze.linalg.{DenseVector => BDV} 23 | import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS} 24 | import org.apache.log4j.{Level, LogManager} 25 | import org.apache.spark.ml.linalg.Vector 26 | import org.apache.spark.rdd.RDD 27 | 28 | import scaladl.layers.AnnTypes.Tensor 29 | import scaladl.tensor.DenseTensor 30 | 31 | 32 | class LBFGS(private var gradient: Gradient, private var updater: Updater) 33 | extends Optimizer { 34 | 35 | private var numCorrections = 10 36 | private var convergenceTol = 1E-6 37 | private var maxNumIterations = 100 38 | private var regParam = 0.0 39 | 40 | /** 41 | * Set the number of corrections used in the LBFGS update. Default 10. 42 | * Values of numCorrections less than 3 are not recommended; large values 43 | * of numCorrections will result in excessive computing time. 44 | * 3 < numCorrections < 10 is recommended. 45 | * Restriction: numCorrections > 0 46 | */ 47 | def setNumCorrections(corrections: Int): this.type = { 48 | assert(corrections > 0) 49 | this.numCorrections = corrections 50 | this 51 | } 52 | 53 | /** 54 | * Set the convergence tolerance of iterations for L-BFGS. Default 1E-6. 55 | * Smaller value will lead to higher accuracy with the cost of more iterations. 56 | * This value must be nonnegative. Lower convergence values are less tolerant 57 | * and therefore generally cause more iterations to be run. 58 | */ 59 | def setConvergenceTol(tolerance: Double): this.type = { 60 | this.convergenceTol = tolerance 61 | this 62 | } 63 | 64 | /* 65 | * Get the convergence tolerance of iterations. 66 | */ 67 | def getConvergenceTol(): Double = { 68 | this.convergenceTol 69 | } 70 | 71 | /** 72 | * Set the maximal number of iterations for L-BFGS. Default 100. 73 | * 74 | * @deprecated use [[LBFGS#setNumIterations]] instead 75 | */ 76 | @deprecated("use setNumIterations instead", "1.1.0") 77 | def setMaxNumIterations(iters: Int): this.type = { 78 | this.setNumIterations(iters) 79 | } 80 | 81 | /** 82 | * Set the maximal number of iterations for L-BFGS. Default 100. 83 | */ 84 | def setNumIterations(iters: Int): this.type = { 85 | this.maxNumIterations = iters 86 | this 87 | } 88 | 89 | /** 90 | * Get the maximum number of iterations for L-BFGS. Defaults to 100. 91 | */ 92 | def getNumIterations(): Int = { 93 | this.maxNumIterations 94 | } 95 | 96 | /** 97 | * Set the regularization parameter. Default 0.0. 98 | */ 99 | def setRegParam(regParam: Double): this.type = { 100 | this.regParam = regParam 101 | this 102 | } 103 | 104 | /** 105 | * Get the regularization parameter. 106 | */ 107 | def getRegParam(): Double = { 108 | this.regParam 109 | } 110 | 111 | /** 112 | * Set the gradient function (of the loss function of one single data example) 113 | * to be used for L-BFGS. 114 | */ 115 | def setGradient(gradient: Gradient): this.type = { 116 | this.gradient = gradient 117 | this 118 | } 119 | 120 | /** 121 | * Set the updater function to actually perform a gradient step in a given direction. 122 | * The updater is responsible to perform the update from the regularization term as well, 123 | * and therefore determines what kind or regularization is used, if any. 124 | */ 125 | def setUpdater(updater: Updater): this.type = { 126 | this.updater = updater 127 | this 128 | } 129 | 130 | /** 131 | * Returns the updater, limited to internal use. 132 | */ 133 | private def getUpdater(): Updater = { 134 | updater 135 | } 136 | 137 | override def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor = { 138 | val (weights, _) = LBFGS.runLBFGS( 139 | data, 140 | gradient, 141 | updater, 142 | numCorrections, 143 | convergenceTol, 144 | maxNumIterations, 145 | regParam, 146 | initialWeights) 147 | weights 148 | } 149 | 150 | } 151 | 152 | object LBFGS { 153 | /** 154 | * Run Limited-memory BFGS (L-BFGS) in parallel. 155 | * Averaging the subgradients over different partitions is performed using one standard 156 | * spark map-reduce in each iteration. 157 | * 158 | * @param data - Input data for L-BFGS. RDD of the set of data examples, each of 159 | * the form (label, [feature values]). 160 | * @param gradient - Gradient object (used to compute the gradient of the loss function of 161 | * one single data example) 162 | * @param updater - Updater function to actually perform a gradient step in a given direction. 163 | * @param numCorrections - The number of corrections used in the L-BFGS update. 164 | * @param convergenceTol - The convergence tolerance of iterations for L-BFGS which is must be 165 | * nonnegative. Lower values are less tolerant and therefore generally 166 | * cause more iterations to be run. 167 | * @param maxNumIterations - Maximal number of iterations that L-BFGS can be run. 168 | * @param regParam - Regularization parameter 169 | * @return A tuple containing two elements. The first element is a column matrix containing 170 | * weights for every feature, and the second element is an array containing the loss 171 | * computed for every iteration. 172 | */ 173 | def runLBFGS( 174 | data: RDD[(Double, Vector)], 175 | gradient: Gradient, 176 | updater: Updater, 177 | numCorrections: Int, 178 | convergenceTol: Double, 179 | maxNumIterations: Int, 180 | regParam: Double, 181 | initialWeights: Tensor): (Tensor, Array[Double]) = { 182 | 183 | val log = LogManager.getRootLogger 184 | 185 | def logWarning(msg: => String) { 186 | if (log.isEnabledFor(Level.WARN)) log.warn(msg) 187 | } 188 | def logInfo(msg: => String) { 189 | if (log.isEnabledFor(Level.INFO)) log.info(msg) 190 | } 191 | 192 | val lossHistory = mutable.ArrayBuilder.make[Double] 193 | 194 | val numExamples = data.count() 195 | 196 | val costFun = 197 | new CostFun(data, gradient, updater, regParam, numExamples) 198 | 199 | val lbfgs = new BreezeLBFGS[BDV[Double]](maxNumIterations, numCorrections, convergenceTol) 200 | 201 | val initialWeightsBrz = BDV[Double](initialWeights.data) 202 | 203 | val states = 204 | lbfgs.iterations(new CachedDiffFunction(costFun), initialWeightsBrz) 205 | 206 | /** 207 | * NOTE: lossSum and loss is computed using the weights from the previous iteration 208 | * and regVal is the regularization value computed in the previous iteration as well. 209 | */ 210 | var state = states.next() 211 | while (states.hasNext) { 212 | lossHistory += state.value 213 | state = states.next() 214 | } 215 | lossHistory += state.value 216 | val weights = new Tensor(state.x.data, Array(state.x.data.length), 0) 217 | 218 | val lossHistoryArray = lossHistory.result() 219 | 220 | logInfo("LBFGS.runLBFGS finished. Last 10 losses %s".format( 221 | lossHistoryArray.takeRight(10).mkString(", "))) 222 | 223 | (weights, lossHistoryArray) 224 | } 225 | 226 | /** 227 | * CostFun implements Breeze's DiffFunction[T], which returns the loss and gradient 228 | * at a particular point (weights). It's used in Breeze's convex optimization routines. 229 | */ 230 | private class CostFun( 231 | data: RDD[(Double, Vector)], 232 | gradient: Gradient, 233 | updater: Updater, 234 | regParam: Double, 235 | numExamples: Long) extends DiffFunction[BDV[Double]] { 236 | 237 | override def calculate(weights: BDV[Double]): (Double, BDV[Double]) = { 238 | // Have a local copy to avoid the serialization of CostFun object which is not serializable. 239 | val w = new Tensor(weights.data, Array(weights.data.length), 0) 240 | val n = w.size 241 | val bcW = data.context.broadcast(w) 242 | val localGradient = gradient 243 | 244 | val (gradientSum, lossSum) = data.treeAggregate((new Tensor(Array(n)), 0.0))( 245 | seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) => 246 | val l = localGradient.compute( 247 | features, label, bcW.value, grad) 248 | (grad, loss + l) 249 | }, 250 | combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) => 251 | DenseTensor.axpy(1, grad2, grad1) 252 | (grad1, loss1 + loss2) 253 | }) 254 | 255 | /** 256 | * regVal is sum of weight squares if it's L2 updater; 257 | * for other updater, the same logic is followed. 258 | */ 259 | val regVal = updater.compute(w, new Tensor(Array(n)), 0, 1, regParam)._2 260 | 261 | val loss = lossSum / numExamples + regVal 262 | /** 263 | * It will return the gradient part of regularization using updater. 264 | * 265 | * Given the input parameters, the updater basically does the following, 266 | * 267 | * w' = w - thisIterStepSize * (gradient + regGradient(w)) 268 | * Note that regGradient is function of w 269 | * 270 | * If we set gradient = 0, thisIterStepSize = 1, then 271 | * 272 | * regGradient(w) = w - w' 273 | * 274 | * TODO: We need to clean it up by separating the logic of regularization out 275 | * from updater to regularizer. 276 | */ 277 | // The following gradientTotal is actually the regularization part of gradient. 278 | // Will add the gradientSum computed from the data with weights in the next step. 279 | val gradientTotal = w.copy() 280 | DenseTensor.axpy(-1.0, 281 | updater.compute(w, new Tensor(Array(n)), 1, 1, regParam)._1, gradientTotal) 282 | 283 | // gradientTotal = gradientSum / numExamples + gradientTotal 284 | DenseTensor.axpy(1.0 / numExamples, gradientSum, gradientTotal) 285 | 286 | (loss, new BDV[Double](gradientTotal.data)) 287 | } 288 | } 289 | } 290 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/optimization/Optimizer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.optimization 19 | 20 | import org.apache.spark.ml.linalg.Vector 21 | import org.apache.spark.rdd.RDD 22 | 23 | import scaladl.layers.AnnTypes.Tensor 24 | 25 | trait Optimizer extends Serializable { 26 | 27 | /** 28 | * Solve the provided convex optimization problem. 29 | */ 30 | def optimize(data: RDD[(Double, Vector)], initialWeights: Tensor): Tensor 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/optimization/Updater.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.optimization 19 | 20 | import scaladl.layers.AnnTypes.Tensor 21 | 22 | abstract class Updater extends Serializable { 23 | /** 24 | * Compute an updated value for weights given the gradient, stepSize, iteration number and 25 | * regularization parameter. Also returns the regularization value regParam * R(w) 26 | * computed using the *updated* weights. 27 | * 28 | * @param weightsOld - Column matrix of size dx1 where d is the number of features. 29 | * @param gradient - Column matrix of size dx1 where d is the number of features. 30 | * @param stepSize - step size across iterations 31 | * @param iter - Iteration number 32 | * @param regParam - Regularization parameter 33 | * @return A tuple of 2 elements. The first element is a column matrix containing updated weights, 34 | * and the second element is the regularization value computed using updated weights. 35 | */ 36 | def compute( 37 | weightsOld: Tensor, 38 | gradient: Tensor, 39 | stepSize: Double, 40 | iter: Int, 41 | regParam: Double): (Tensor, Double) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/main/scala/scaladl/tensor/DenseTensor.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.tensor 19 | 20 | import scala.collection.mutable.ArrayBuffer 21 | import scala.reflect.ClassTag 22 | 23 | import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS} 24 | 25 | object Algebra { 26 | trait NumberLike[@specialized (Double, Float) T] extends Serializable { 27 | def plus(x: T, y: T): T 28 | def minus(x: T, y: T): T 29 | def times(x: T, y: T): T 30 | def sqrt(x: T): T 31 | } 32 | object NumberLike { 33 | implicit object NumberLikeDouble extends NumberLike[Double] { 34 | def plus(x: Double, y: Double): Double = x + y 35 | def minus(x: Double, y: Double): Double = x - y 36 | def times(x: Double, y: Double): Double = x * y 37 | def sqrt(x: Double): Double = math.sqrt(x) 38 | } 39 | implicit object NumberLikeFloat extends NumberLike[Float] { 40 | def plus(x: Float, y: Float): Float = x + y 41 | def minus(x: Float, y: Float): Float = x - y 42 | def times(x: Float, y: Float): Float = x * y 43 | def sqrt(x: Float): Float = math.sqrt(x.toDouble).toFloat 44 | } 45 | } 46 | } 47 | import Algebra.NumberLike 48 | 49 | /** 50 | * Dense tensor column-major representation. // TODO: row major?? 51 | * 52 | * @param data underlying data 53 | * @param tensorShape shape of tensor 54 | * @param offset offset in the data 55 | * @tparam T type 56 | */ 57 | class DenseTensor[@specialized(Double, Float) T] ( 58 | val data: Array[T], 59 | val tensorShape: Array[Int], 60 | val offset: Int, 61 | isTransposed: Boolean = false)(implicit numOps: NumberLike[T]) extends Serializable { 62 | 63 | private var actualSize: Int = 0 64 | private var majorStride: Int = 0 65 | private var requiredSize: Int = 0 66 | // Fix of the Scala specialized constructor bug: 67 | // http://axel22.github.io/2013/11/03/specialization-quirks.html 68 | protected def init(data: Array[T], tensorShape: Array[Int]): Unit = { 69 | actualSize = data.length - offset 70 | majorStride = if (isTransposed) tensorShape.last else tensorShape.head 71 | requiredSize = tensorShape.product 72 | } 73 | init(data, tensorShape) 74 | // TODO: figure out which of size, shape etc can be removed or replaced in other functions 75 | // private val actualSize = data.size//data.length - offset 76 | // // Major stride (always the first??? dimension since stored in columnar format) 77 | // private val majorStride = if (isTransposed) tensorShape.last else tensorShape.head 78 | // private val requiredSize = tensorShape.product 79 | require(requiredSize <= actualSize, 80 | "Actual size of the array does not correspond to dimension Sizes") 81 | private var myShape = tensorShape 82 | 83 | /** 84 | * Allocate new tensor 85 | * @param tensorShape tensor shape 86 | * @param m type parameter 87 | * @param numOps ops parameter 88 | */ 89 | def this(tensorShape: Array[Int])(implicit m: ClassTag[T], numOps: NumberLike[T]) = { 90 | this(new Array[T](tensorShape.product), tensorShape, 0) 91 | } 92 | 93 | /** 94 | * New tensor given data and shape 95 | * @param data data array 96 | * @param tensorShape shape 97 | * @param m type 98 | * @param numOps ops 99 | */ 100 | def this(data: Array[T], tensorShape: Array[Int]) 101 | (implicit m: ClassTag[T], numOps: NumberLike[T]) = { 102 | this(data, tensorShape, 0, false) 103 | } 104 | 105 | /** 106 | * Don't use this in loops!!! 107 | * @return 108 | */ 109 | def size: Int = myShape.product 110 | /** 111 | * Shape of the tensor 112 | * 113 | * @return shape 114 | */ 115 | def shape: Array[Int] = myShape 116 | 117 | /** 118 | * Reshape the tensor. Supports reshaping within the same data size 119 | * 120 | * @param newShape new shape 121 | * @return reshaped tensor backed by the same data 122 | */ 123 | def reshape(newShape: Array[Int]): DenseTensor[T] = { 124 | val requiredSize = newShape.product 125 | require(requiredSize == actualSize) 126 | myShape = newShape 127 | this 128 | } 129 | 130 | /** 131 | * Update value of a Tensor 132 | * 133 | * @param index index 134 | * @param value value 135 | */ 136 | def update(index: Int, value: T): Unit = { 137 | require(index >=0 && index < requiredSize) 138 | data(this.offset + index) = value 139 | } 140 | 141 | /** 142 | * Update value of a Tensor 143 | * 144 | * @param index index 145 | * @param value value 146 | */ 147 | def update(index: Array[Int], value: T): Unit = { 148 | data(offset(index)) = value 149 | } 150 | 151 | /** 152 | * Get the value at position index 153 | * 154 | * @param index index 155 | * @return value 156 | */ 157 | def value(index: Int): T = { 158 | require(index >=0 && index < requiredSize) 159 | data(this.offset + index) 160 | } 161 | 162 | /** 163 | * Get the value at position index 164 | * 165 | * @param index index 166 | * @return value 167 | */ 168 | def value(index: Array[Int]): T = { 169 | data(offset(index)) 170 | } 171 | 172 | private def offset(index: Array[Int]): Int = { 173 | var offset = index.last 174 | for (i <- myShape.length - 1 to 1 by -1) { 175 | offset = index(i - 1) + myShape(i - 1) * offset 176 | } 177 | offset 178 | } 179 | 180 | /** 181 | * Check if tensor is transposed 182 | * 183 | * @return true if transposed, false otherwise 184 | */ 185 | def transposed: Boolean = isTransposed 186 | 187 | /** 188 | * Transpose tensor. Does not actually transpose the data. 189 | * It is used for operations such as gemm. 190 | * 191 | * @return self 192 | */ 193 | def transpose(implicit m: ClassTag[T]): DenseTensor[T] = { 194 | require(tensorShape.length == 2, "Transpose is valid only for 2 dimensional tensor") 195 | val transposedTensor = DenseTensor[T](data, tensorShape.reverse, offset, true) 196 | transposedTensor 197 | } 198 | 199 | /** 200 | * Slice the tensor by the last dimension 201 | * 202 | * @param from index 203 | * @param until index 204 | * @return tensor backed by the same data 205 | */ 206 | def slice(from: Int, until: Int): DenseTensor[T] = { 207 | require(from < until && from < myShape(0) && until <= myShape(0), 208 | "start and end must be within the size of first dimension, also start <= end") 209 | val shapeInit = myShape.init 210 | val lastDimensionNewSize = until - from 211 | val startOffset = offset(shapeInit.map(_ => 0) :+ from) 212 | new DenseTensor[T](data, shapeInit :+ lastDimensionNewSize, startOffset) 213 | } 214 | 215 | /** 216 | * Slice the tensor by one index in the last dimension 217 | * 218 | * @param index index 219 | * @return squeezed tensor 220 | */ 221 | def slice(index: Int): DenseTensor[T] = { 222 | slice(index, index + 1).squeeze() 223 | } 224 | 225 | /** 226 | * Squeze the dimensions of size 1 227 | * 228 | * @return tensor backed by the same data 229 | */ 230 | def squeeze(): DenseTensor[T] = { 231 | val buf = new ArrayBuffer[Int](myShape.length) 232 | for (dim <- myShape) { 233 | if (dim > 1) buf += dim 234 | } 235 | myShape = buf.toArray 236 | this 237 | } 238 | 239 | /** 240 | * Copy the underlying data 241 | * 242 | * @param m ClassTag 243 | * @return data array 244 | */ 245 | def copyData()(implicit m: ClassTag[T]): Array[T] = { 246 | val array = new Array[T](myShape.product) 247 | System.arraycopy(data, offset, array, 0, array.length) 248 | array 249 | } 250 | 251 | def copy()(implicit m: ClassTag[T]): DenseTensor[T] = { 252 | val array = new Array[T](myShape.product) 253 | System.arraycopy(data, offset, array, 0, array.length) 254 | new DenseTensor(array, myShape, offset, isTransposed) 255 | } 256 | 257 | /** 258 | * Fill tensor with the data from the other tensor 259 | * 260 | * @param donor tensor from which to get data 261 | * @return self 262 | */ 263 | def fillWith(donor: DenseTensor[T]): DenseTensor[T] = { 264 | require(size % donor.size == 0 && size >= donor.size, 265 | "data size of recipient tensor must be >= and divide evenly by the data size of donor tensor") 266 | val donorSize = donor.size 267 | val numCopies = size / donorSize 268 | var k = 0 269 | var nextOffset = 0 270 | while (k < numCopies) { 271 | System.arraycopy(donor.data, donor.offset, this.data, this.offset + nextOffset, donorSize) 272 | nextOffset += donorSize 273 | k += 1 274 | } 275 | this 276 | } 277 | 278 | /** 279 | * Plus operation 280 | * @param other other tensor 281 | * @param m type parameter 282 | * @return returns new tensor 283 | */ 284 | def +(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = { 285 | require(equalShape(other), "Must be equal shape") 286 | val sz = size 287 | val newData = new Array[T](sz) 288 | var thisIndex = this.offset 289 | var otherIndex = other.offset 290 | var i = 0 291 | while (i < sz) { 292 | newData(i) = numOps.plus(this.data(thisIndex), other.data(otherIndex)) 293 | thisIndex += 1 294 | otherIndex += 1 295 | i += 1 296 | } 297 | DenseTensor(newData, shape.clone()) 298 | } 299 | 300 | /** 301 | * Minus operation 302 | * @param other other tensor 303 | * @param m type parameter 304 | * @return returns new tensor 305 | */ 306 | def -(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = { 307 | require(equalShape(other), "Must be equal shape") 308 | val sz = size 309 | val newData = new Array[T](sz) 310 | var i = 0 311 | while (i < sz) { 312 | newData(i) = numOps.minus(this.data(this.offset + i), other.data(other.offset + i)) 313 | i += 1 314 | } 315 | DenseTensor(newData, shape.clone()) 316 | } 317 | 318 | /** 319 | * Elementwise multiplication 320 | * @param other other tensor 321 | * @param m type parameter 322 | * @return returns new tensor 323 | */ 324 | def :*(other: DenseTensor[T])(implicit m: ClassTag[T]): DenseTensor[T] = { 325 | require(equalShape(other), "Must be equal shape") 326 | val sz = size 327 | val newData = new Array[T](sz) 328 | var i = 0 329 | while (i < sz) { 330 | newData(i) = numOps.times(this.data(this.offset + i), other.data(other.offset + i)) 331 | i += 1 332 | } 333 | DenseTensor(newData, shape.clone()) 334 | } 335 | 336 | // TODO: fix this ugliness 337 | private def equalShape(other: DenseTensor[T]): Boolean = { 338 | val thisShape = this.shape 339 | val otherShape = other.shape 340 | if (thisShape.length != otherShape.length) { 341 | return false 342 | } else { 343 | var i = 0 344 | while (i < thisShape.length) { 345 | if (thisShape(i) != otherShape(i)) { 346 | return false 347 | } 348 | i += 1 349 | } 350 | } 351 | true 352 | } 353 | 354 | /** 355 | * Sum of the elements 356 | * @return sum 357 | */ 358 | def sum: T = { 359 | var i = offset 360 | var mySum = numOps.minus(data(i), data(i)) 361 | val max = offset + size 362 | while (i < max) { 363 | mySum = numOps.plus(mySum, data(i)) 364 | i += 1 365 | } 366 | mySum 367 | } 368 | 369 | /** 370 | * Norm of the vector 371 | * @return norm 372 | */ 373 | def norm: T = { 374 | var i = offset 375 | var mySum = numOps.minus(data(i), data(i)) 376 | val max = offset + size 377 | while (i < max) { 378 | mySum = numOps.plus(mySum, numOps.times(data(i), data(i))) 379 | i += 1 380 | } 381 | numOps.sqrt(mySum) 382 | } 383 | 384 | /** 385 | * Equals for transposed, shape and data 386 | * @param other tensor 387 | * @return true if equal, false overwise 388 | */ 389 | def isEqual(other: DenseTensor[T]): Boolean = { 390 | if (this.transposed != other.transposed || !equalShape(other)) { 391 | return false 392 | } else { 393 | var i = 0 394 | while (i < data.length) { 395 | if (data(i) != other.data(i)) { 396 | return false 397 | } 398 | i += 1 399 | } 400 | } 401 | true 402 | } 403 | 404 | override def toString(): String = { 405 | // TODO: implement row-by-row print 406 | val buf = new StringBuilder() 407 | for (i <- offset until offset + size) { 408 | var product: Int = 1 409 | val index = new Array[Int](myShape.length) 410 | for (dim <- 0 until myShape.length - 1) { 411 | val dimValue = (i / product) % myShape(dim) 412 | product *= myShape(dim) 413 | index(dim) = dimValue 414 | } 415 | index(myShape.length - 1) = i / product 416 | buf.append(value(index)) 417 | buf.append(" ") 418 | } 419 | buf.toString() 420 | } 421 | 422 | } 423 | 424 | object DenseTensor { 425 | 426 | /** 427 | * Create a tensor with zeros 428 | * 429 | * @param tensorShape shape 430 | * @param m ClassTag 431 | * @tparam T implicit type 432 | * @return tensor 433 | */ 434 | def apply[@specialized(Double, Float) T](tensorShape: Array[Int]) 435 | (implicit m: ClassTag[T], 436 | numOps: NumberLike[T]): DenseTensor[T] = { 437 | val data: Array[T] = new Array[T](tensorShape.product) 438 | DenseTensor(data, tensorShape) 439 | } 440 | 441 | /** 442 | * Create a tensor from data 443 | * 444 | * @param data data 445 | * @param tensorShape shape 446 | * @param offset offset in the data 447 | * @param m ClassTag 448 | * @tparam T implicit type 449 | * @return tensor 450 | */ 451 | def apply[@specialized(Double, Float) T]( 452 | data: Array[T], 453 | tensorShape: Array[Int], 454 | offset: Int = 0, 455 | isTransposed: Boolean = false) 456 | (implicit m: ClassTag[T], numOps: NumberLike[T]): DenseTensor[T] = { 457 | new DenseTensor[T](data, tensorShape, offset, isTransposed) 458 | } 459 | 460 | /** 461 | * Create and fill tensor with values 462 | * 463 | * @param tensorShape shape 464 | * @param elem value 465 | * @param m ClassTag 466 | * @tparam T type 467 | * @return tensor 468 | */ 469 | def fill[@specialized(Double, Float) T](tensorShape: Array[Int]) 470 | (elem: => T) 471 | (implicit m: ClassTag[T], 472 | numOps: NumberLike[T]): DenseTensor[T] = { 473 | val data: Array[T] = Array.fill[T](tensorShape.product)(elem) 474 | DenseTensor(data, tensorShape) 475 | } 476 | 477 | /** 478 | * Apply a function to tensor x in place 479 | * 480 | * @param x source 481 | * @param func function 482 | * @tparam T type 483 | */ 484 | def applyFunction[@specialized(Double, Float) T](x: DenseTensor[T], func: T => T) 485 | (implicit m: ClassTag[T], numOps: NumberLike[T]): Unit = { 486 | var i = x.offset 487 | val sz = x.offset + x.size 488 | while (i < sz) { 489 | x.data(i) = func(x.data(i)) 490 | i += 1 491 | } 492 | } 493 | 494 | /** 495 | * Apply a function to tensor x and put the result in the y 496 | * 497 | * @param x source 498 | * @param y result 499 | * @param func function 500 | * @tparam T type 501 | */ 502 | def applyFunction[@specialized(Double, Float) T](x: DenseTensor[T], 503 | y: DenseTensor[T], 504 | func: T => T) 505 | (implicit m: ClassTag[T], 506 | numOps: NumberLike[T]): Unit = { 507 | require(x.size == y.size, "Tensor sizes must be equal") 508 | var i = 0 509 | val sz = y.size 510 | while (i < sz) { 511 | y.data(y.offset + i) = func(x.data(x.offset + i)) 512 | i += 1 513 | } 514 | } 515 | 516 | /** 517 | * Apply a function to tensor x and put the result in the y 518 | * 519 | * @param x1 source1 520 | * @param x2 source2 521 | * @param y result 522 | * @param func function 523 | * @tparam T type 524 | */ 525 | def applyFunction[@specialized(Double, Float) T]( 526 | x1: DenseTensor[T], 527 | x2: DenseTensor[T], 528 | y: DenseTensor[T], 529 | func: (T, T) => T)(implicit m: ClassTag[T], numOps: NumberLike[T]): Unit = { 530 | require(x1.size == y.size && x2.size == y.size, "Tensor sizes must be equal") 531 | var i = 0 532 | val sz = y.offset + y.size 533 | while (i < sz) { 534 | y.data(y.offset + i) = func(x1.data(x1.offset + i), x2.data(x2.offset + i)) 535 | i += 1 536 | } 537 | } 538 | 539 | /** 540 | * Double 2d tensor multiplication C <- alpha * A * B + beta * C 541 | * 542 | * @param alpha alpha 543 | * @param a A 544 | * @param b B 545 | * @param beta beta 546 | * @param c C 547 | */ 548 | def gemm( 549 | alpha: Double, 550 | a: DenseTensor[Double], 551 | b: DenseTensor[Double], 552 | beta: Double, 553 | c: DenseTensor[Double]): Unit = { 554 | // TODO: case with 3d and more 555 | require(a.shape.length == 2 && b.shape.length == 2 && c.shape.length == 2, 556 | "A, B, or C are not 2d tensors") 557 | // TODO: add code if matrices isTranspose!!! 558 | require(a.shape(1) == b.shape(0), "A & B Dimension mismatch!") 559 | require(a.shape(0) == c.shape(0), "A & C Dimension mismatch!") 560 | require(b.shape(1) == c.shape(1), "B & C Dimension mismatch!") 561 | NativeBLAS.dgemm(transposeString(a), transposeString(b), c.shape(0), c.shape(1), a.shape(1), 562 | // TODO: check majorStride 563 | alpha, a.data, a.offset, a.majorStride, 564 | b.data, b.offset, b.majorStride, 565 | beta, c.data, c.offset, c.shape(0)) 566 | } 567 | 568 | /** 569 | * Double 2d tensor multiplication C <- alpha * A * B + beta * C 570 | * 571 | * @param alpha alpha 572 | * @param a A 573 | * @param b B 574 | * @param beta beta 575 | * @param c C 576 | */ 577 | def gemm( 578 | alpha: Float, 579 | a: DenseTensor[Float], 580 | b: DenseTensor[Float], 581 | beta: Float, 582 | c: DenseTensor[Float]): Unit = { 583 | // TODO: case with 3d and more 584 | require(a.shape.length == 2 && b.shape.length == 2 && c.shape.length == 2, 585 | "A, B, or C are not 2d tensors") 586 | // TODO: add code if matrices isTranspose!!! 587 | require(a.shape(1) == b.shape(0), "A & B Dimension mismatch!") 588 | require(a.shape(0) == c.shape(0), "A & C Dimension mismatch!") 589 | require(b.shape(1) == c.shape(1), "B & C Dimension mismatch!") 590 | NativeBLAS.sgemm(transposeString(a), transposeString(b), c.shape(0), c.shape(1), a.shape(1), 591 | // TODO: check majorStride 592 | alpha, a.data, a.offset, a.majorStride, 593 | b.data, b.offset, b.majorStride, 594 | beta, c.data, c.offset, c.shape(0)) 595 | } 596 | 597 | private def transposeString[T](a: DenseTensor[T]): String = if (a.transposed) "T" else "N" 598 | 599 | /** 600 | * GEMV: y := alpha * A * x + beta * y 601 | * 602 | * @param alpha alpha 603 | * @param a A 604 | * @param x x 605 | * @param beta beta 606 | * @param y y 607 | */ 608 | def gemv( 609 | alpha: Double, 610 | a: DenseTensor[Double], 611 | x: DenseTensor[Double], 612 | beta: Double, 613 | y: DenseTensor[Double]): Unit = { 614 | require(a.shape.length == 2 && x.shape.length == 1 && y.shape.length == 1, 615 | "A must be 2d and X, Y - 1d tensors") 616 | require(a.shape(1) == x.shape(0), "A & X Dimension mismatch!") 617 | require(a.shape(0) == y.shape(0), "A & Y Dimension mismatch!") 618 | NativeBLAS.dgemv(transposeString(a), a.shape(0), a.shape(1), 619 | alpha, a.data, a.offset, a.shape(0), 620 | x.data, x.offset, 1, 621 | beta, y.data, y.offset, 1) 622 | } 623 | 624 | /** 625 | * GEMV: y := alpha * A * x + beta * y 626 | * 627 | * @param alpha alpha 628 | * @param a A 629 | * @param x x 630 | * @param beta beta 631 | * @param y y 632 | */ 633 | def gemv( 634 | alpha: Float, 635 | a: DenseTensor[Float], 636 | x: DenseTensor[Float], 637 | beta: Float, 638 | y: DenseTensor[Float]): Unit = { 639 | require(a.shape.length == 2 && x.shape.length == 1 && y.shape.length == 1, 640 | "A must be 2d and X, Y - 1d tensors") 641 | require(a.shape(1) == x.shape(0), "A & X Dimension mismatch!") 642 | require(a.shape(0) == y.shape(0), "A & Y Dimension mismatch!") 643 | NativeBLAS.sgemv(transposeString(a), a.shape(0), a.shape(1), 644 | alpha, a.data, a.offset, a.shape(0), 645 | x.data, x.offset, 1, 646 | beta, y.data, y.offset, 1) 647 | } 648 | 649 | /** 650 | * y := alpha * x + y 651 | * 652 | * @param alpha alpha 653 | * @param x vector x 654 | * @param y vector y 655 | */ 656 | def axpy(alpha: Double, x: DenseTensor[Double], y: DenseTensor[Double]): Unit = { 657 | require(x.size == y.size, "x and y sizes equals") 658 | val n = x.size 659 | NativeBLAS.daxpy(n, alpha, x.data, 1, y.data, 1) 660 | } 661 | 662 | /** 663 | * y := alpha * x + y 664 | * 665 | * @param alpha alpha 666 | * @param x vector x 667 | * @param y vector y 668 | */ 669 | def axpy(alpha: Float, x: DenseTensor[Float], y: DenseTensor[Float]): Unit = { 670 | require(x.size == y.size, "x and y sizes equals") 671 | val n = x.size 672 | NativeBLAS.saxpy(n, alpha, x.data, 1, y.data, 1) 673 | } 674 | 675 | /** 676 | * x := alpha * x 677 | * @param alpha alpha 678 | * @param x vector x 679 | */ 680 | def scal(alpha: Double, x: DenseTensor[Double]): Unit = { 681 | val n = x.size 682 | NativeBLAS.dscal(n, alpha, x.data, x.offset, 1) 683 | } 684 | 685 | /** 686 | * x := alpha * x 687 | * @param alpha alpha 688 | * @param x x 689 | */ 690 | def scal(alpha: Float, x: DenseTensor[Float]): Unit = { 691 | val n = x.size 692 | NativeBLAS.sscal(n, alpha, x.data, x.offset, 1) 693 | } 694 | 695 | protected def elementwise( 696 | a: DenseTensor[Double], 697 | b: DenseTensor[Double], 698 | op: (Double, Double) => Double): Unit = { 699 | require(a.size == b.size, "Tensors of different size") 700 | var i = 0 701 | val sz = a.size 702 | while (i < sz) { 703 | a.data(i) = op(a.data(i), b.data(i)) 704 | i += 1 705 | } 706 | } 707 | 708 | /** 709 | * Elementwise product a := a * b 710 | * 711 | * @param a vector a 712 | * @param b vector b 713 | */ 714 | def elementwiseProduct(a: DenseTensor[Double], b: DenseTensor[Double]): Unit = { 715 | elementwise(a, b, (x, y) => x * y) 716 | } 717 | } 718 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/ml/scaladl/ANNSpeedSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.ml.scaladl 19 | 20 | import org.apache.spark.ml.classification.{MultilayerPerceptronClassifier => SMLP} 21 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator 22 | import org.apache.spark.ml.scaladl.{MultilayerPerceptronClassifier => TMLP} 23 | import org.scalatest.FunSuite 24 | 25 | import scaladl.util.SparkTestContext 26 | 27 | class ANNSpeedSuite extends FunSuite with SparkTestContext { 28 | 29 | // test ("speed test") { 30 | // val mnistPath = System.getenv("MNIST_HOME") 31 | // println(mnistPath + "/mnist.scale") 32 | // val dataFrame = sqlContext. 33 | // createDataFrame(MLUtils.loadLibSVMFile(sc, mnistPath + "/mnist.scale", 784)).persist() 34 | // dataFrame.count() 35 | // val mlp = new MultilayerPerceptronClassifier().setLayers(Array(784, 32, 10)) 36 | // .setTol(10e-9) 37 | // .setMaxIter(20) 38 | // .setSeed(1234L) 39 | // val t = System.nanoTime() 40 | // val model = mlp.fit(dataFrame) 41 | // val total = System.nanoTime() - t 42 | // println("Total time: " + total / 1e9 + " s. (should be ~42s. without native BLAS") 43 | // val test = sqlContext. 44 | // createDataFrame(MLUtils.loadLibSVMFile(sc, mnistPath + "/mnist.scale.t", 784)).persist() 45 | // test.count() 46 | // val result = model.transform(test) 47 | // val pl = result.select("prediction", "label") 48 | // val ev = new MulticlassClassificationEvaluator().setMetricName("precision") 49 | // println("Accuracy: " + ev.evaluate(pl)) 50 | // } 51 | 52 | test ("speed test with tensor (native BLAS and MNIST_HOME needs to be configured") { 53 | val mnistPath = System.getenv("MNIST_HOME") 54 | val dataFrame = spark 55 | .read 56 | .format("libsvm") 57 | .option("numFeatures", 784) 58 | .load(mnistPath + "/mnist.scale") 59 | .persist() 60 | dataFrame.count() 61 | val layers = Array(784, 100, 10) 62 | val maxIter = 20 63 | val tol = 1e-9 64 | val warmUp = new SMLP().setLayers(layers) 65 | .setTol(10e-9) 66 | .setMaxIter(1) 67 | .setSeed(1234L) 68 | .fit(dataFrame) 69 | val weights = warmUp.weights 70 | 71 | val mlp = new SMLP().setLayers(layers) 72 | .setTol(tol) 73 | .setMaxIter(maxIter) 74 | .setInitialWeights(weights.copy) 75 | val t = System.nanoTime() 76 | val model = mlp.fit(dataFrame) 77 | val total = System.nanoTime() - t 78 | val tensorMLP = new TMLP().setLayers(layers) 79 | .setTol(tol) 80 | .setMaxIter(maxIter) 81 | .setInitialWeights(weights.copy) 82 | val tTensor = System.nanoTime() 83 | val tModel = tensorMLP.fit(dataFrame) 84 | val totalTensor = System.nanoTime() - tTensor 85 | // time is 49.9 s on my machine 86 | assert(math.abs(totalTensor - total) / 1e9 < 0.15 * total /1e9, 87 | "Training time of tensor version should differ no more than 15% s. from original version") 88 | val test = spark 89 | .read 90 | .format("libsvm") 91 | .option("numFeatures", 784) 92 | .load(mnistPath + "/mnist.scale.t") 93 | .persist() 94 | test.count() 95 | val result = model.transform(test) 96 | val pl = result.select("prediction", "label") 97 | val ev = new MulticlassClassificationEvaluator().setMetricName("accuracy") 98 | val tResult = tModel.transform(test) 99 | val tpl = tResult.select("prediction", "label") 100 | val tev = new MulticlassClassificationEvaluator().setMetricName("accuracy") 101 | assert(tev.evaluate(tpl) == ev.evaluate(pl), "Accuracies must be equal") 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/ml/scaladl/MultilayerPerceptronClassifierSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.ml.scaladl 19 | 20 | import org.apache.spark.ml.linalg.Vectors 21 | import org.apache.spark.sql.Row 22 | import org.scalatest.FunSuite 23 | 24 | import scaladl.util.SparkTestContext 25 | 26 | class MultilayerPerceptronClassifierSuite extends FunSuite with SparkTestContext { 27 | 28 | test("XOR function learning as binary classification problem with two outputs.") { 29 | val dataFrame = spark.createDataFrame(Seq( 30 | (Vectors.dense(0.0, 0.0), 0.0), 31 | (Vectors.dense(0.0, 1.0), 1.0), 32 | (Vectors.dense(1.0, 0.0), 1.0), 33 | (Vectors.dense(1.0, 1.0), 0.0)) 34 | ).toDF("features", "label") 35 | val layers = Array[Int](2, 5, 2) 36 | val trainer = new MultilayerPerceptronClassifier() 37 | .setLayers(layers) 38 | .setBlockSize(1) 39 | .setSeed(123L) 40 | .setMaxIter(100) 41 | val model = trainer.fit(dataFrame) 42 | val result = model.transform(dataFrame) 43 | val predictionAndLabels = result.select("prediction", "label").collect() 44 | predictionAndLabels.foreach { case Row(p: Double, l: Double) => 45 | assert(p == l) 46 | } 47 | } 48 | 49 | test("Test setWeights by training restart") { 50 | val dataFrame = spark.createDataFrame(Seq( 51 | (Vectors.dense(0.0, 0.0), 0.0), 52 | (Vectors.dense(0.0, 1.0), 1.0), 53 | (Vectors.dense(1.0, 0.0), 1.0), 54 | (Vectors.dense(1.0, 1.0), 0.0)) 55 | ).toDF("features", "label") 56 | val layers = Array[Int](2, 5, 2) 57 | val trainer = new MultilayerPerceptronClassifier() 58 | .setLayers(layers) 59 | .setBlockSize(1) 60 | .setSeed(123456L) 61 | .setMaxIter(1) 62 | .setTol(1e-6) 63 | val initialWeights = trainer.fit(dataFrame).weights 64 | trainer.setInitialWeights(initialWeights.copy) 65 | val weights1 = trainer.fit(dataFrame).weights 66 | trainer.setInitialWeights(initialWeights.copy) 67 | val weights2 = trainer.fit(dataFrame).weights 68 | weights1.toArray.zip(weights2.toArray).foreach { x => 69 | assert(math.abs(x._1 - x._2) <= 10e-5, 70 | "Training should produce the same weights given equal initial weights and number of steps") 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/ml/scaladl/StackedAutoencoderSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.ml.scaladl 19 | 20 | import org.apache.spark.ml.linalg.{Vector, Vectors} 21 | import org.apache.spark.sql.Row 22 | import org.scalatest.FunSuite 23 | 24 | import scaladl.util.SparkTestContext 25 | 26 | class StackedAutoencoderSuite extends FunSuite with SparkTestContext { 27 | 28 | // using data similar to https://inst.eecs.berkeley.edu/~cs182/sp08/assignments/a3-tlearn.html 29 | val binaryData = Seq( 30 | Vectors.dense(Array(1.0, 0.0, 0.0, 0.0)), 31 | Vectors.dense(Array(0.0, 1.0, 0.0, 0.0)), 32 | Vectors.dense(Array(0.0, 0.0, 1.0, 0.0)), 33 | Vectors.dense(Array(0.0, 0.0, 0.0, 1.0))) 34 | 35 | val real01Data = Seq( 36 | Vectors.dense(Array(0.5, 0.1, 0.1, 0.1)), 37 | Vectors.dense(Array(0.1, 0.6, 0.5, 0.5)), 38 | Vectors.dense(Array(0.5, 0.5, 0.5, 0.5)), 39 | Vectors.dense(Array(0.9, 0.9, 0.9, 0.9))) 40 | 41 | val realData = Seq( 42 | Vectors.dense(Array(10.0, 0.0, 0.0, 0.0)), 43 | Vectors.dense(Array(0.0, 1.0, 0.0, 0.0)), 44 | Vectors.dense(Array(0.0, 0.0, 10.0, 0.0)), 45 | Vectors.dense(Array(0.0, 0.0, 0.0, 10.0))) 46 | 47 | test("Autoencoder reconstructs the original data by encoding and decoding") { 48 | val dataSets = Seq(binaryData, real01Data, realData) 49 | val dataTypes = Seq(true, true, false) 50 | val dataSetAndTypes = dataSets.zip(dataTypes) 51 | for ((data, is01) <- dataSetAndTypes) { 52 | val rdd = sc.parallelize(data, 1).map(x => Tuple1(x)) 53 | val df = spark.createDataFrame(rdd).toDF("input") 54 | val stackedAutoencoder = new StackedAutoencoder() 55 | .setLayers(Array(4, 3, 3)) 56 | .setBlockSize(1) 57 | .setMaxIter(100) 58 | .setSeed(123456789L) 59 | .setTol(1e-6) 60 | .setInputCol("input") 61 | .setOutputCol("output") 62 | .setDataIn01Interval(is01) 63 | .setBuildDecoder(true) 64 | // TODO: find a way to inherit the input and output parameter value from estimator 65 | val saModel = stackedAutoencoder.fit(df) 66 | saModel.setInputCol("input").setOutputCol("encoded") 67 | // encoding 68 | val encodedData = saModel.transform(df) 69 | // decoding 70 | saModel.setInputCol("encoded").setOutputCol("decoded") 71 | val decodedData = saModel.decode(encodedData) 72 | // epsilon == 1/100 of the maximum value 73 | val eps = if (is01) 1.0 / 100 else 10.0 / 100 74 | decodedData.collect.foreach { case Row(input: Vector, _: Vector, decoded: Vector) => 75 | input.toArray.zip(decoded.toArray).foreach { x => 76 | assert(math.abs(x._1 - x._2) <= eps, 77 | "Decoder should produce vectors close to the input") 78 | } 79 | } 80 | } 81 | } 82 | 83 | test("Autoencoder use for pre-training") { 84 | val seed = 123456789L 85 | val numIter = 20 86 | val dataFrame = spark.createDataFrame(Seq( 87 | (Vectors.dense(0.0, 0.0), 0.0), 88 | (Vectors.dense(0.0, 1.0), 1.0), 89 | (Vectors.dense(1.0, 0.0), 1.0), 90 | (Vectors.dense(1.0, 1.0), 0.0)) 91 | ).toDF("features", "label") 92 | val layers = Array[Int](2, 7, 6, 5, 4, 3, 2) 93 | val trainer = new MultilayerPerceptronClassifier() 94 | .setLayers(layers) 95 | .setBlockSize(1) 96 | .setSeed(seed) 97 | .setMaxIter(1) 98 | .setTol(1e-6) 99 | val initialWeights = trainer.fit(dataFrame).weights 100 | trainer 101 | .setInitialWeights(initialWeights.copy) 102 | .setMaxIter(numIter) 103 | val badModel = trainer.fit(dataFrame) 104 | val badResult = badModel.transform(dataFrame) 105 | val badPredictionAndLabels = badResult.select("prediction", "label").collect() 106 | // solution converged to a bad optimum 107 | assert(!badPredictionAndLabels.forall { case Row(p: Double, l: Double) => 108 | p == l 109 | }, "Model should not predict as expected") 110 | 111 | // pre-train all layers except last as stacked autoencoder 112 | val encoderLayers = layers.init 113 | val autoEncoder = new StackedAutoencoder("stackedAutoencoder") 114 | .setBlockSize(1) 115 | .setBuildDecoder(false) 116 | .setDataIn01Interval(true) 117 | .setInputCol("features") 118 | .setLayers(encoderLayers) 119 | .setMaxIter(numIter) 120 | .setSeed(seed) 121 | .setTol(1e-6) 122 | val autoEncoderModel = autoEncoder.fit(dataFrame) 123 | val autoEncoderWeights = autoEncoderModel.encoderWeights 124 | // initialize weights for the classifier and copy pre-trained weights 125 | System.arraycopy( 126 | autoEncoderWeights.toArray, 0, initialWeights.toArray, 0, autoEncoderWeights.toArray.length) 127 | val preTrainer = new MultilayerPerceptronClassifier() 128 | .setLayers(layers) 129 | .setBlockSize(1) 130 | .setInitialWeights(initialWeights) 131 | .setMaxIter(numIter) 132 | .setTol(1e-6) 133 | val preModel = preTrainer.fit(dataFrame) 134 | val preResult = preModel.transform(dataFrame) 135 | val predictionAndLabels = preResult.select("prediction", "label").collect() 136 | predictionAndLabels.foreach { case Row(p: Double, l: Double) => 137 | assert(p == l, "Training after pre-training should succeed") 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/layers/GradientSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.layers 19 | 20 | import org.apache.spark.ml.linalg.Vectors 21 | import org.scalatest.FunSuite 22 | 23 | import scaladl.layers.AnnTypes._ 24 | import scaladl.tensor.DenseTensor 25 | 26 | class GradientSuite extends FunSuite { 27 | 28 | test("Gradient computation against numerical differentiation") { 29 | val x = DenseTensor[Double](Array(1.0, 1.0, 1.0), Array(3, 1)) 30 | val input = new Tensor(Array(1.0, 1.0, 1.0), Array(3, 1)) 31 | // output must contain zeros and one 1 for SoftMax 32 | val target = new Tensor(Array(0.0, 1.0), Array(2, 1)) 33 | val topology = FeedForwardTopology.multiLayerPerceptron(Array(3, 4, 2), softmaxOnTop = false) 34 | val layersWithErrors = Seq( 35 | new SigmoidLayerWithSquaredError(), 36 | new SoftmaxLayerWithCrossEntropyLoss(), 37 | new SigmoidLayerWithCrossEntropyLoss(), 38 | new EmptyLayerWithSquaredError() 39 | ) 40 | // check all layers that provide loss computation 41 | // 1) compute loss and gradient given the model and initial weights 42 | // 2) modify weights with small number epsilon (per dimension i) 43 | // 3) compute new loss 44 | // 4) ((newLoss - loss) / epsilon) should be close to the i-th component of the gradient 45 | for (layerWithError <- layersWithErrors) { 46 | topology.layers(topology.layers.length - 1) = layerWithError 47 | val model = topology.model(seed = 12L) 48 | val weights = model.weights.toArray 49 | val numWeights = weights.size 50 | val gradient = new Tensor(Array(numWeights)) 51 | val loss = model.computeGradient(input, target, gradient, 1) 52 | val eps = 1e-4 53 | var i = 0 54 | val tol = 1e-4 55 | while (i < numWeights) { 56 | val originalValue = weights(i) 57 | weights(i) += eps 58 | val newModel = topology.model(Vectors.dense(weights)) 59 | val newLoss = computeLoss(input, target, newModel) 60 | val derivativeEstimate = (newLoss - loss) / eps 61 | assert(math.abs(gradient.value(i) - derivativeEstimate) < tol, 62 | "Layer failed gradient check: " + layerWithError.getClass) 63 | weights(i) = originalValue 64 | i += 1 65 | } 66 | } 67 | } 68 | 69 | private def computeLoss(input: Tensor, target: Tensor, model: TopologyModel): Double = { 70 | val outputs = model.forward(input) 71 | model.layerModels.last match { 72 | case layerWithLoss: LossFunction => 73 | layerWithLoss.loss(outputs.last, target, new Tensor(target.shape)) 74 | case _ => 75 | throw new UnsupportedOperationException("Top layer is required to have loss." + 76 | " Failed layer:" + model.layerModels.last.getClass) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/layers/LayerSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.layers 19 | 20 | import org.apache.spark.ml.linalg.Vectors 21 | import org.scalatest.FunSuite 22 | 23 | import scaladl.util.SparkTestContext 24 | 25 | class LayerSuite extends FunSuite with SparkTestContext { 26 | 27 | // TODO: test for weights comparison with Weka MLP 28 | test("ANN with Sigmoid learns XOR function with LBFGS optimizer") { 29 | val inputs = Array( 30 | Array(0.0, 0.0), 31 | Array(0.0, 1.0), 32 | Array(1.0, 0.0), 33 | Array(1.0, 1.0) 34 | ) 35 | val outputs = Array(0.0, 1.0, 1.0, 0.0) 36 | val data = inputs.zip(outputs).map { case (features, label) => 37 | (Vectors.dense(features), Vectors.dense(label)) 38 | } 39 | val rddData = sc.parallelize(data, 1) 40 | val hiddenLayersTopology = Array(5) 41 | val dataSample = rddData.first() 42 | val layerSizes = dataSample._1.size +: hiddenLayersTopology :+ dataSample._2.size 43 | val topology = FeedForwardTopology.multiLayerPerceptron(layerSizes, false) 44 | val initialWeights = FeedForwardModel(topology, 23124).weights 45 | val trainer = new FeedForwardTrainer(topology, 2, 1) 46 | trainer.setWeights(initialWeights) 47 | trainer.LBFGSOptimizer.setNumIterations(20) 48 | val model = trainer.train(rddData) 49 | val predictionAndLabels = rddData.map { case (input, label) => 50 | (model.predict(input)(0), label(0)) 51 | }.collect() 52 | predictionAndLabels.foreach { case (p, l) => 53 | assert(math.round(p) === l) 54 | } 55 | } 56 | 57 | test("ANN with SoftMax learns XOR function with 2-bit output and batch GD optimizer") { 58 | val inputs = Array( 59 | Array(0.0, 0.0), 60 | Array(0.0, 1.0), 61 | Array(1.0, 0.0), 62 | Array(1.0, 1.0) 63 | ) 64 | val outputs = Array( 65 | Array(1.0, 0.0), 66 | Array(0.0, 1.0), 67 | Array(0.0, 1.0), 68 | Array(1.0, 0.0) 69 | ) 70 | val data = inputs.zip(outputs).map { case (features, label) => 71 | (Vectors.dense(features), Vectors.dense(label)) 72 | } 73 | val rddData = sc.parallelize(data, 1) 74 | val hiddenLayersTopology = Array(5) 75 | val dataSample = rddData.first() 76 | val layerSizes = dataSample._1.size +: hiddenLayersTopology :+ dataSample._2.size 77 | val topology = FeedForwardTopology.multiLayerPerceptron(layerSizes, false) 78 | val initialWeights = FeedForwardModel(topology, 23124).weights 79 | val trainer = new FeedForwardTrainer(topology, 2, 2) 80 | // TODO: add a test for SGD 81 | trainer.LBFGSOptimizer.setConvergenceTol(1e-4).setNumIterations(20) 82 | trainer.setWeights(initialWeights).setStackSize(1) 83 | val model = trainer.train(rddData) 84 | val predictionAndLabels = rddData.map { case (input, label) => 85 | (model.predict(input), label) 86 | }.collect() 87 | predictionAndLabels.foreach { case (p, l) => 88 | p.toArray.zip(l.toArray).foreach(pair => assert(math.abs(pair._1 - pair._2) < 0.5)) 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/tensor/DenseTensorSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.tensor 19 | 20 | import org.scalatest.FunSuite 21 | 22 | class DenseTensorSuite extends FunSuite { 23 | 24 | test ("value") { 25 | val data = Array[Double](1, 2, 3, 4, 5, 6, 7, 8) 26 | val shape2d = Array(4, 2) 27 | val tensor2d = DenseTensor[Double](data, shape2d) 28 | assert(tensor2d.value(Array(2, 1)) == 7.0, "(1, 1) must be 7.0") 29 | val shape3d = Array(2, 2, 2) 30 | val tensor3d = DenseTensor[Double](data, shape3d) 31 | assert(tensor3d.value(Array(1, 1, 1)) == 8.0, "(1, 1, 1) must be 8.0") 32 | } 33 | 34 | test ("slice") { 35 | val data8 = Array[Double](0, 1, 2, 3, 4, 5, 6, 7) 36 | val shape2d = Array(4, 2) 37 | val tensor2d = DenseTensor[Double](data8, shape2d) 38 | val slice2d = tensor2d.slice(1, 2) 39 | assert(slice2d.copyData().deep == data8.slice(4, 8).deep, 40 | "The resulting slice must be (4, 5, 6, 7) ") 41 | val data12 = Array[Double](0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) 42 | val shape3d = Array(2, 2, 3) 43 | val tensor3d = DenseTensor[Double](data12, shape3d) 44 | val slice3d = tensor3d.slice(1, 2) 45 | assert(slice3d.copyData().deep == data12.slice(4, 8).deep, 46 | "The resulting slice must be (4, 5, 6, 7) ") 47 | val shape5d = Array(2, 1, 2, 1, 3) 48 | val tensor5d = DenseTensor[Double](data12, shape5d) 49 | val slice5dto2d = tensor5d.slice(1) 50 | assert(slice5dto2d.copyData().deep == data12.slice(4, 8).deep, 51 | "The resulting slice must be (4, 5, 6, 7) ") 52 | } 53 | 54 | test ("apply function") { 55 | val shape2d = Array(4, 2) 56 | val a = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d) 57 | DenseTensor.applyFunction(a, (t: Double) => t * t) 58 | assert(a.copyData().deep == Array[Double](0, 1, 4, 9, 16, 25, 36, 49).deep, 59 | "The result must be (1, 2, 3, 4, 5, 6, 7, 8)") 60 | val x = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d) 61 | val y = DenseTensor[Double](shape2d) 62 | def func: (Double) => Double = v => v + 1 63 | DenseTensor.applyFunction[Double](x, y, func) 64 | assert(y.copyData().deep == Array[Double](1, 2, 3, 4, 5, 6, 7, 8).deep, 65 | "The result must be (1, 2, 3, 4, 5, 6, 7, 8)") 66 | val x2 = DenseTensor[Double](Array[Double](0, 1, 2, 3, 4, 5, 6, 7), shape2d) 67 | val x1 = x 68 | val z = DenseTensor[Double](shape2d) 69 | def func2: (Double, Double) => Double = (v1, v2) => v1 + v2 70 | DenseTensor.applyFunction[Double](x1, x2, z, func2) 71 | assert(z.copyData().deep == Array[Double](0, 2, 4, 6, 8, 10, 12, 14).deep, 72 | "The result must be (0, 2, 4, 6, 8, 10, 12, 14)") 73 | } 74 | 75 | test ("fillWith") { 76 | val recipient = DenseTensor[Double](Array(4, 2)) 77 | val donor = DenseTensor[Double](Array[Double](0, 1, 2, 3), Array(4, 1)) 78 | recipient.fillWith(donor) 79 | assert(recipient.copyData().deep == Array[Double](0, 1, 2, 3, 0, 1, 2, 3).deep, 80 | "The result must be (0, 1, 2, 3, 0, 1, 2, 3)") 81 | } 82 | 83 | test ("fill") { 84 | val onesTensor = DenseTensor.fill[Double](Array(1, 2, 1))(1.0) 85 | assert(onesTensor.copyData().forall(x => x == 1.0), "All elements are 1.0") 86 | } 87 | 88 | test ("plus double") { 89 | val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 90 | val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 91 | val z = x + y 92 | val trueZ = DenseTensor[Double](Array[Double](2, 4, 6, 8, 10, 12), Array(2, 3)) 93 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 94 | } 95 | 96 | test ("plus float") { 97 | val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 98 | val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 99 | val z = x + y 100 | val trueZ = DenseTensor[Float](Array[Float](2, 4, 6, 8, 10, 12), Array(2, 3)) 101 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 102 | } 103 | 104 | test ("minus double") { 105 | val x = DenseTensor[Double](Array[Double](2, 4, 6, 8, 10, 12), Array(2, 3)) 106 | val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 107 | val z = x - y 108 | val trueZ = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 109 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 110 | } 111 | 112 | test ("minus float") { 113 | val x = DenseTensor[Float](Array[Float](2, 4, 6, 8, 10, 12), Array(2, 3)) 114 | val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 115 | val z = x - y 116 | val trueZ = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 117 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 118 | } 119 | 120 | test ("elementwise product double") { 121 | val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 122 | val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 123 | val z = x :* y 124 | val trueZ = DenseTensor[Double](Array[Double](1, 4, 9, 16, 25, 36), Array(2, 3)) 125 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 126 | } 127 | 128 | test ("elementwise product float") { 129 | val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 130 | val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 131 | val z = x :* y 132 | val trueZ = DenseTensor[Float](Array[Float](1, 4, 9, 16, 25, 36), Array(2, 3)) 133 | assert(z.isEqual(trueZ), "Transposed, shape or data differs") 134 | } 135 | 136 | test ("sum double") { 137 | val x = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 138 | assert(x.sum == 21, "Sum has to be 21") 139 | } 140 | 141 | test ("sum float") { 142 | val x = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 143 | assert(x.sum == 21, "Sum has to be 21") 144 | } 145 | 146 | test ("axpy double precision") { 147 | val alpha = 2 148 | val x = DenseTensor[Double](Array[Double](0.5, 1, 1.5, 2, 2.5, 3), Array(6)) 149 | val y = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(6)) 150 | DenseTensor.axpy(alpha, x, y) 151 | assert(y.copyData().deep == Array[Double](2, 4, 6, 8, 10, 12).deep) 152 | } 153 | 154 | test ("axpy single precision") { 155 | val alpha = 2 156 | val x = DenseTensor[Float](Array[Float](0.5f, 1f, 1.5f, 2f, 2.5f, 3f), Array(6)) 157 | val y = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(6)) 158 | DenseTensor.axpy(alpha, x, y) 159 | assert(y.copyData().deep == Array[Float](2, 4, 6, 8, 10, 12).deep) 160 | } 161 | 162 | test ("dgemm double precision") { 163 | val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 164 | val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2)) 165 | val c = DenseTensor[Double](Array(2, 2)) 166 | DenseTensor.gemm(1.0, a, b, 0.0, c) 167 | assert(c.copyData().deep == Array[Double](22, 28, 49, 64).deep) 168 | DenseTensor.gemm(0.5, a, b, 0.5, c) 169 | assert(c.copyData().deep == Array[Double](22, 28, 49, 64).deep) 170 | } 171 | 172 | test ("dgemm double precision transpose") { 173 | val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2)) 174 | val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(3, 2)) 175 | val c = DenseTensor[Double](Array(2, 2)) 176 | DenseTensor.gemm(1.0, a.transpose, b, 0.0, c) 177 | assert(c.copyData().deep == Array[Double](14, 32, 32, 77).deep) 178 | } 179 | 180 | test ("dgemm single precision") { 181 | val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 182 | val b = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2)) 183 | val c = DenseTensor[Float](Array(2, 2)) 184 | DenseTensor.gemm(1.0f, a, b, 0.0f, c) 185 | assert(c.copyData().deep == Array[Float](22, 28, 49, 64).deep) 186 | DenseTensor.gemm(0.5f, a, b, 0.5f, c) 187 | assert(c.copyData().deep == Array[Float](22, 28, 49, 64).deep) 188 | } 189 | 190 | test ("dgemm single precision transpose") { 191 | val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2)) 192 | val b = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(3, 2)) 193 | val c = DenseTensor[Float](Array(2, 2)) 194 | DenseTensor.gemm(1.0f, a.transpose, b, 0.0f, c) 195 | assert(c.copyData().deep == Array[Double](14, 32, 32, 77).deep) 196 | } 197 | 198 | test("gemv double precision") { 199 | val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 200 | val x = DenseTensor[Double](Array[Double](1, 2, 3), Array(3)) 201 | val y = DenseTensor[Double](Array[Double](2, 2), Array(2)) 202 | DenseTensor.gemv(1.0, a, x, 0.5, y) 203 | assert(y.copyData().deep == Array[Double](23, 29).deep) 204 | } 205 | 206 | test("gemv single precision") { 207 | val a = DenseTensor[Float](Array[Float](1, 2, 3, 4, 5, 6), Array(2, 3)) 208 | val x = DenseTensor[Float](Array[Float](1, 2, 3), Array(3)) 209 | val y = DenseTensor[Float](Array[Float](2, 2), Array(2)) 210 | DenseTensor.gemv(1.0f, a, x, 0.5f, y) 211 | assert(y.copyData().deep == Array[Float](23, 29).deep) 212 | } 213 | 214 | test ("elementwise product") { 215 | val a = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 216 | val b = DenseTensor[Double](Array[Double](1, 2, 3, 4, 5, 6), Array(2, 3)) 217 | DenseTensor.elementwiseProduct(a, b) 218 | assert(a.copyData().deep == Array[Double](1, 4, 9, 16, 25, 36).deep) 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/tensor/NumericBoxingTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.tensor 19 | 20 | class NumericBoxingTest[@specialized(Double, Float) T : Numeric] { 21 | lazy val numOps = implicitly[Numeric[T]] 22 | def plus(x: T, y: T): T = numOps.plus(x, y) 23 | } 24 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/tensor/TypedClassTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.tensor 19 | 20 | import scaladl.tensor.Math.NumberLike 21 | 22 | object Math { 23 | trait NumberLike[@specialized (Double, Int) T] { 24 | def plus(x: T, y: T): T 25 | } 26 | object NumberLike { 27 | implicit object NumberLikeDouble extends NumberLike[Double] { 28 | def plus(x: Double, y: Double): Double = x + y 29 | } 30 | implicit object NumberLikeInt extends NumberLike[Int] { 31 | def plus(x: Int, y: Int): Int = x + y 32 | } 33 | } 34 | } 35 | object Statistics { 36 | import Math.NumberLike 37 | def plus[@specialized (Double, Int) T](x: T, y: T)(implicit ev: NumberLike[T]): T = 38 | ev.plus(x, y) 39 | def plusDouble(x: Double, y: Double): Double = x + y 40 | } 41 | 42 | class My[@specialized (Double, Int) T](implicit ev: NumberLike[T]) { 43 | def plus(x: T, y: T): T = ev.plus(x, y) 44 | } 45 | 46 | object TypedClassTest { 47 | def main(args: Array[String]): Unit = { 48 | // Statistics.plus(2.0, 2.0) 49 | // Statistics.plusDouble(2.0, 2.0) 50 | val m = new My[Double]() 51 | m.plus(2.0, 2.0) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/scala/scaladl/util/SparkTestContext.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package scaladl.util 19 | 20 | import org.apache.log4j.{Level, Logger} 21 | import org.apache.spark.sql.SparkSession 22 | import org.apache.spark.SparkContext 23 | import org.scalatest.{BeforeAndAfterAll, Suite} 24 | 25 | trait SparkTestContext extends BeforeAndAfterAll { self: Suite => 26 | @transient var spark: SparkSession = _ 27 | @transient var sc: SparkContext = _ 28 | @transient var checkpointDir: String = _ 29 | 30 | override def beforeAll() { 31 | super.beforeAll() 32 | spark = SparkSession.builder 33 | .master("local[2]") 34 | .appName("MLlibUnitTest") 35 | .config("spark.sql.warehouse.dir", "warehouse-temp") 36 | .getOrCreate() 37 | sc = spark.sparkContext 38 | Logger.getLogger("org").setLevel(Level.WARN) 39 | } 40 | 41 | override def afterAll() { 42 | try { 43 | SparkSession.clearActiveSession() 44 | if (spark != null) { 45 | spark.stop() 46 | } 47 | spark = null 48 | } finally { 49 | super.afterAll() 50 | } 51 | } 52 | } 53 | 54 | --------------------------------------------------------------------------------