├── project ├── build.properties └── Dependencies.scala ├── .scalafmt.conf ├── .gitignore ├── src ├── test │ └── scala │ │ ├── examples │ │ └── LinearRegSmokeTest.scala │ │ └── ml │ │ └── network │ │ ├── MaxPoolTest.scala │ │ └── Conv2DTest.scala └── main │ └── scala │ ├── ml │ ├── math │ │ └── geneircMath.scala │ ├── network │ │ ├── api.scala │ │ ├── metrics.scala │ │ ├── initialization.scala │ │ ├── loss.scala │ │ ├── activators.scala │ │ ├── optimizers.scala │ │ ├── ann.scala │ │ └── layers.scala │ ├── preprocessing │ │ ├── TextLoader.scala │ │ └── encoders.scala │ ├── transformation.scala │ └── tensors │ │ ├── ndarray.scala │ │ ├── tensor.scala │ │ └── ops.scala │ └── examples │ ├── fileUtils.scala │ ├── mnistCommon.scala │ ├── CNN.scala │ ├── MNIST.scala │ ├── multipleRegression.scala │ ├── linearRegression.scala │ └── MnistLoader.scala ├── .github └── workflows │ └── scala.yml ├── README.md └── plots.sc /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.5.5 2 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.0.0-RC3" 2 | runner.dialect = scala3 3 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dependencies { 4 | lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.10" 5 | } 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ammonite 2 | .metals 3 | .vscode 4 | .dotty-* 5 | .idea 6 | target 7 | .bsp 8 | metals.sbt 9 | .bloop 10 | .DS_Store 11 | notebooks 12 | metrics 13 | images -------------------------------------------------------------------------------- /src/test/scala/examples/LinearRegSmokeTest.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import org.scalatest.flatspec.AnyFlatSpec 4 | 5 | class LinearRegSmokeTest extends AnyFlatSpec { 6 | it should "run linear regression example without a fail" in { 7 | lrTest() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.github/workflows/scala.yml: -------------------------------------------------------------------------------- 1 | name: Scala CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up JDK 11 17 | uses: actions/setup-java@v1 18 | with: 19 | java-version: 11.0.x 20 | - name: Run tests 21 | run: sbt test 22 | -------------------------------------------------------------------------------- /src/main/scala/ml/math/geneircMath.scala: -------------------------------------------------------------------------------- 1 | package ml.math 2 | 3 | import scala.reflect.ClassTag 4 | import ml.transformation.castFromTo 5 | 6 | object generic: 7 | def exp[T: ClassTag](v: T)(using n: Numeric[T]): T = 8 | castFromTo[Double, T](math.exp(n.toDouble(v))) 9 | 10 | def pow[T: ClassTag](x: T, y: T)(using n: Numeric[T]): T = 11 | castFromTo[Double, T](math.pow(n.toDouble(x), n.toDouble(y))) 12 | 13 | def max[T: ClassTag](x: T, y: T)(using n: Numeric[T]): T = 14 | castFromTo[Double, T](math.max(n.toDouble(x), n.toDouble(y))) 15 | 16 | def log[T: ClassTag](x: T)(using n: Numeric[T]): T = 17 | castFromTo[Double, T](math.log(n.toDouble(x))) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning in Scala from scratch 2 | 3 | Example of mini-library implementation for Artificial Neural Networks training and inference. 4 | 5 | __Implementation includes:__ 6 | 7 | 1. Mini-library for subset of Tensor calculus 8 | 1. Mini-library for data preparation 9 | 1. A DSL for Neural Network creation, including layers 10 | 1. Pluggable weights optimizer and initializer 11 | 1. Pluggable implementation of activation and loss functions 12 | 1. Pluggable training metric calculation 13 | 14 | 15 | See examples: 16 | - [Classification with Artificial Neural Network](src/main/scala/examples/multipleRegression.scala) 17 | - [Linear Regression](src/main/scala/examples/linearRegression.scala) 18 | - [MNIST Images Classification](src/main/scala/examples/MNIST.scala) -------------------------------------------------------------------------------- /src/main/scala/examples/fileUtils.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import ml.network.Model 4 | 5 | import scala.util.Using 6 | 7 | import java.io.File 8 | import java.io.PrintWriter 9 | import java.nio.file.Path 10 | 11 | def store(filename: String, header: String, data: List[List[String]]) = 12 | Using.resource(new PrintWriter(new File(filename))) { w => 13 | w.write(header) 14 | data.foreach { row => 15 | w.write(s"\n${row.mkString(",")}") 16 | } 17 | } 18 | 19 | def storeMetrics[T](model: Model[T], path: Path) = 20 | val values = model.metricValues 21 | val header = s"epoch,loss,${values.map(_._1.name).mkString(",")}" 22 | val acc = values.headOption.map(_._2).getOrElse(Nil) 23 | val lrData = model.history.losses.zip(acc).zipWithIndex.map { 24 | case ((loss, acc), epoch) => List(epoch.toString, loss.toString, acc.toString) 25 | } 26 | store(path.toString, header, lrData) -------------------------------------------------------------------------------- /src/main/scala/examples/mnistCommon.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import ml.network.api._ 4 | import ml.tensors.api._ 5 | import ml.tensors.ops._ 6 | import ml.preprocessing._ 7 | 8 | import scala.reflect.ClassTag 9 | 10 | object mnistCommon: 11 | val imageDir = "images" 12 | 13 | def accuracyMnist[T: ClassTag: Ordering](using n: Numeric[T]) = new Metric[T]: 14 | val name = "accuracy" 15 | 16 | def matches(actual: Tensor[T], predicted: Tensor[T]): Int = 17 | val predictedArgMax = predicted.argMax 18 | actual.argMax.equalRows(predictedArgMax) 19 | 20 | def prepareData[T: ClassTag](x: Tensor[T], y: Tensor[T])(using n: Fractional[T]) = 21 | val encoder = OneHotEncoder( 22 | classes = (0 to 9).map(i => (n.fromInt(i), n.fromInt(i))).toMap 23 | ) 24 | val max = n.fromInt(255) 25 | val xData = x.map(v => n.div(v, max)) // normalize to [0,1] range 26 | val yData = encoder.transform(y.as1D) 27 | (xData, yData) -------------------------------------------------------------------------------- /src/main/scala/ml/network/api.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | object api: 4 | final type StandardGD = ml.network.StandardGD 5 | final type Adam = ml.network.Adam 6 | final type Stub = ml.network.Stub 7 | 8 | final type MetricValues[T] = ml.network.MetricValues[T] 9 | 10 | final type RandomUniform = ml.network.RandomUniform 11 | final type HeNormal = ml.network.HeNormal 12 | 13 | export ml.network.Dense 14 | export ml.network.Conv2D 15 | export ml.network.MaxPool 16 | export ml.network.Flatten2D 17 | export ml.network.Layer 18 | export ml.network.optimizers.given 19 | export ml.network.Optimizable 20 | export ml.network.Sequential 21 | export ml.network.Model 22 | export ml.network.GradientClippingApi.* 23 | export ml.network.GradientClippingApi 24 | export ml.network.GradientClipping 25 | export ml.network.MetricApi.* 26 | export ml.network.Metric 27 | export ml.network.LossApi.* 28 | export ml.network.ActivationFuncApi.* 29 | export ml.network.ParamsInitializer 30 | export ml.network.inits.given 31 | export ml.network.inits -------------------------------------------------------------------------------- /src/main/scala/ml/network/metrics.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | 6 | import scala.reflect.ClassTag 7 | 8 | trait Metric[T]: 9 | val name: String 10 | 11 | // number of matched predictions versus actual labels 12 | def matches( 13 | actual: Tensor[T], 14 | predicted: Tensor[T] 15 | ): Int 16 | 17 | def average(count: Int, matches: Int): Double = 18 | matches.toDouble / count 19 | 20 | def apply(actual: Tensor[T], predicted: Tensor[T]): Double = 21 | val correct = matches(actual, predicted) 22 | average(actual.length, correct) 23 | 24 | object MetricApi: 25 | def predictedToBinary[T](v: T)(using n: Numeric[T]): T = 26 | if n.toDouble(v) > 0.5 then n.one else n.zero 27 | 28 | def accuracyBinaryClassification[T: ClassTag: Fractional] = new Metric[T]: 29 | val name = "accuracy" 30 | 31 | def matches( 32 | actual: Tensor[T], 33 | predicted: Tensor[T] 34 | ): Int = 35 | val predictedBinary = predicted.map(predictedToBinary) 36 | actual.equalRows(predictedBinary) -------------------------------------------------------------------------------- /src/main/scala/ml/preprocessing/TextLoader.scala: -------------------------------------------------------------------------------- 1 | package ml.preprocessing 2 | 3 | import ml.transformation.{castTo, castArray} 4 | import ml.tensors.api._ 5 | import ml.tensors.ops.{T, col, slice} 6 | 7 | import java.io.File 8 | import java.nio.file.Path 9 | import scala.io.Source 10 | import scala.reflect.ClassTag 11 | import scala.util.Using 12 | 13 | object TextLoader: 14 | val defaultDelimiter: String = "," 15 | 16 | def apply(rows: String*): TextLoader = 17 | TextLoader(data = rows.toArray.map(_.split(defaultDelimiter).toArray)) 18 | 19 | case class TextLoader( 20 | path: Path = new File("data.csv").toPath, 21 | header: Boolean = true, 22 | delimiter: String = TextLoader.defaultDelimiter, 23 | data: Array[Array[String]] = Array.empty[Array[String]] 24 | ): 25 | 26 | def load(): TextLoader = copy( 27 | data = Using.resource(Source.fromFile(path.toFile)) { s => 28 | val lines = s.getLines() 29 | (if header && lines.nonEmpty then lines.toArray.tail else lines.toArray) 30 | .map(_.split(delimiter)) 31 | } 32 | ) 33 | 34 | def cols[T: ClassTag](from: Int, to: Int): Tensor2D[T] = 35 | castTo[T](data.slice(None, Some((from, to)))) 36 | 37 | def col[T: ClassTag](i: Int): Tensor1D[T] = 38 | val col = data.col(i) 39 | Tensor1D(castArray[T](col)) 40 | 41 | def cols[T: ClassTag](i: Int): Tensor[T] = col(i).T -------------------------------------------------------------------------------- /src/main/scala/ml/network/initialization.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.transformation.castFromTo 4 | import ml.tensors.api._ 5 | import ml.tensors.ops.as4D 6 | 7 | import scala.reflect.ClassTag 8 | import java.util.Random 9 | 10 | trait ParamsInitializer[A, B]: 11 | 12 | def weights(rows: Int, cols: Int): Tensor2D[A] 13 | 14 | def biases(length: Int): Tensor1D[A] 15 | 16 | def weights4D( 17 | shape: List[Int] 18 | )(using c: ClassTag[A], n: Numeric[A]): Tensor4D[A] = 19 | val tensors :: cubes :: rows :: cols :: Nil = shape 20 | (0 until tensors) 21 | .map(_ => (0 until cubes).toArray.map(_ => weights(rows, cols))) 22 | .toArray 23 | .as4D 24 | 25 | // support Initializers 26 | type RandomUniform 27 | type HeNormal 28 | 29 | object inits: 30 | def zeros[T: ClassTag](length: Int)(using n: Numeric[T]): Tensor1D[T] = 31 | Tensor1D(Array.fill(length)(n.zero)) 32 | 33 | given [T: Numeric: ClassTag]: ParamsInitializer[T, RandomUniform] with 34 | 35 | def gen: T = 36 | castFromTo[Double, T](math.random().toDouble + 0.001d) 37 | 38 | override def weights(rows: Int, cols: Int): Tensor2D[T] = 39 | Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen))) 40 | 41 | override def biases(length: Int): Tensor1D[T] = 42 | zeros(length) 43 | 44 | given [T: ClassTag: Numeric]: ParamsInitializer[T, HeNormal] with 45 | val rnd = new Random() 46 | 47 | def gen(lenght: Int): T = 48 | castFromTo[Double, T] { 49 | val v = rnd.nextGaussian + 0.001d 50 | v * math.sqrt(2d / lenght.toDouble) 51 | } 52 | 53 | override def weights(rows: Int, cols: Int): Tensor2D[T] = 54 | Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen(rows)))) 55 | 56 | override def biases(length: Int): Tensor1D[T] = 57 | zeros(length) 58 | -------------------------------------------------------------------------------- /src/main/scala/examples/CNN.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import mnistCommon._ 4 | import ml.transformation.castFromTo 5 | import ml.tensors.api._ 6 | import ml.tensors.ops._ 7 | import ml.network.api._ 8 | import ml.network.api.given 9 | import ml.network.api.inits.given 10 | import ml.preprocessing._ 11 | 12 | import java.nio.file.Path 13 | import java.util.Random 14 | import scala.reflect.ClassTag 15 | 16 | @main 17 | def CNN() = 18 | type Precision = Float 19 | val accuracy = accuracyMnist[Precision] 20 | 21 | def clipByNorm[T: Fractional: ClassTag](norm: T) = new GradientClipping[T]: 22 | def apply(t: Tensor[T]) = 23 | t match 24 | case (Tensor4D(data)) => 25 | data.map(_.map(_.as2D.clipByNorm(norm).as2D)).as4D // clipping within matrix 26 | case _ => 27 | t.clipByNorm(norm) 28 | 29 | val cnn = Sequential[Precision, Adam, HeNormal]( 30 | crossEntropy, 31 | learningRate = 0.0015, 32 | metrics = List(accuracy), 33 | batchSize = 128, 34 | gradientClipping = clipByNorm(10.0), 35 | printStepTps = true 36 | ) 37 | .add(Conv2D(relu, 8, kernel = (5, 5))) 38 | .add(MaxPool(strides = (2, 2), window = (4, 4), padding = false)) 39 | .add(Flatten2D()) 40 | .add(Dense(relu, 64)) 41 | .add(Dense(softmax, 10)) 42 | 43 | val dataset = MnistLoader.loadData[Precision](imageDir, flat = false) 44 | val (xTrain, yTrain) = prepareData(dataset.trainImage, dataset.trainLabels) 45 | 46 | val start = System.currentTimeMillis() 47 | val model = cnn.train(xTrain, yTrain, epochs = 5, shuffle = true) 48 | println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec") 49 | 50 | val (xTest, yTest) = prepareData(dataset.testImages, dataset.testLabels) 51 | val testPredicted = model(xTest) 52 | val value = accuracy(yTest, testPredicted) 53 | println(s"test accuracy = $value") -------------------------------------------------------------------------------- /src/main/scala/examples/MNIST.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import mnistCommon._ 4 | import ml.transformation.{castTo, castFromTo} 5 | import ml.tensors.api._ 6 | import ml.tensors.ops._ 7 | import ml.network.api._ 8 | import ml.network.api.given 9 | import ml.network.inits.given 10 | import ml.preprocessing._ 11 | 12 | import java.nio.file.Path 13 | import scala.reflect.ClassTag 14 | 15 | @main def MNIST() = 16 | type Precision = Float 17 | val dataset = MnistLoader.loadData[Precision](imageDir) 18 | val accuracy = accuracyMnist[Precision] 19 | 20 | val ann = Sequential[Precision, Adam, HeNormal]( 21 | crossEntropy, 22 | learningRate = 0.001, 23 | metrics = List(accuracy), 24 | batchSize = 128, 25 | gradientClipping = clipByValue(5.0), 26 | printStepTps = true 27 | ) 28 | .add(Dense(relu, 50)) 29 | .add(Dense(softmax, 10)) 30 | 31 | val (xTrain, yTrain) = prepareData(dataset.trainImage, dataset.trainLabels) 32 | val start = System.currentTimeMillis() 33 | val model = ann.train(xTrain, yTrain, epochs = 15, shuffle = true) 34 | println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec") 35 | 36 | val (xTest, yTest) = prepareData(dataset.testImages, dataset.testLabels) 37 | val testPredicted = model(xTest) 38 | val value = accuracy(yTest, testPredicted) 39 | println(s"test accuracy = $value") 40 | 41 | // Single Test 42 | val singleTestImage = dataset.testImages.as2D.data.head 43 | val imageMap = singleTestImage.grouped(28) 44 | .map(_.map(s => f"${s.toInt}%4s").mkString).mkString("\n") 45 | println(imageMap) 46 | val label = dataset.testLabels.as1D.data.head 47 | val predicted = model(singleTestImage.as2D).argMax.as0D.data 48 | println(s"predicted = $predicted") 49 | 50 | assert(label == predicted, 51 | s"Predicted label is not equal to expected '$label' label, but was '$predicted'") 52 | 53 | storeMetrics(model, Path.of("metrics/mnist.csv")) -------------------------------------------------------------------------------- /src/main/scala/ml/transformation.scala: -------------------------------------------------------------------------------- 1 | package ml 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | import TypeHelper._ 6 | import scala.reflect.ClassTag 7 | 8 | object TypeHelper: 9 | val String_ = classOf[String] 10 | val Int_ = classOf[Int] 11 | val Long_ = classOf[Long] 12 | val Float_ = classOf[Float] 13 | val Double_ = classOf[Double] 14 | 15 | // set of functions to parse and cast in the same time 16 | object transformation: 17 | 18 | def castTo[T: ClassTag]( 19 | data: Array[Array[String]] 20 | ): Tensor2D[T] = 21 | val transformed = data.map(castArray[T]) 22 | Tensor2D[T](transformed) 23 | 24 | def castArray[T: ClassTag](data: Array[String]): Array[T] = 25 | summon[ClassTag[T]].runtimeClass match 26 | case Float_ => data.map(_.toFloat.asInstanceOf[T]) 27 | case String_ => data.map(_.asInstanceOf[T]) 28 | case Double_ => data.map(_.toDouble.asInstanceOf[T]) 29 | 30 | private def castFromIntTo[T: ClassTag](data: Int): T = 31 | summon[ClassTag[T]].runtimeClass match 32 | case Float_ => data.toFloat.asInstanceOf[T] 33 | case String_ => data.toString.asInstanceOf[T] 34 | case Double_ => data.toDouble.asInstanceOf[T] 35 | case Int_ => data.asInstanceOf[T] 36 | 37 | def castFromTo[A, B](a: A)(using ev1: ClassTag[A], ev2: ClassTag[B]): B = 38 | (ev1.runtimeClass, ev2.runtimeClass) match 39 | case (Float_, String_) => a.toString.asInstanceOf[B] 40 | case (Float_, Double_) => a.asInstanceOf[Float].toDouble.asInstanceOf[B] 41 | case (Float_, Float_) => a.asInstanceOf[B] 42 | case (String_, Float_) => a.toString.toFloat.asInstanceOf[B] 43 | case (String_, Double_) => a.toString.toDouble.asInstanceOf[B] 44 | case (Double_, String_) => a.toString.asInstanceOf[B] 45 | case (Double_, Float_) => a.asInstanceOf[Double].toFloat.asInstanceOf[B] 46 | case (Double_, Double_) => a.asInstanceOf[B] 47 | case (Int_, _) => castFromIntTo[B](a.asInstanceOf[Int]) -------------------------------------------------------------------------------- /src/main/scala/ml/tensors/ndarray.scala: -------------------------------------------------------------------------------- 1 | package ml.tensors.api 2 | 3 | import scala.reflect.ClassTag 4 | 5 | case class NDArray[T: ClassTag: Numeric](data: Array[Any], shape: List[Int]): 6 | private def printArray(a: Array[Any], level: Int = 1): Array[String] = 7 | a.map { e => 8 | e match 9 | case ar: Array[Any] => 10 | val start = s"\n${" " * level}[" 11 | val body = printArray(ar, level + 1).mkString(",") 12 | val end = if body.last == ']' then s"\n${" " * level}]" else "]" 13 | s"$start$body$end" 14 | case _ => s"$e" 15 | } 16 | 17 | override def toString: String = 18 | val str = printArray(data).mkString(", ") 19 | "[" + str + (if str.last == ']' then "\n" else "") + "]" 20 | 21 | 22 | object NDArray: 23 | def init[T](shape: List[Int], v: T)(using n: Numeric[T]): Array[Any] = 24 | shape match 25 | case Nil => Array(v) 26 | case h :: Nil => Array.fill(h)(v) 27 | case h :: t => Array.fill(h)(init(t, v)) 28 | 29 | def zeros[T: ClassTag](shape: Int*)(using n: Numeric[T]): NDArray[T] = 30 | NDArray[T](init(shape.toList, n.zero), shape.toList) 31 | 32 | def ones[T: ClassTag](shape: Int*)(using n: Numeric[T]): NDArray[T] = 33 | NDArray[T](init(shape.toList, n.one), shape.toList) 34 | 35 | extension [T: ClassTag: Numeric](a: NDArray[T]) 36 | protected def reshape(shape: Int*): NDArray[T] = 37 | val newShape = shape.toList 38 | assert(a.shape.reduce(_ * _) == newShape.reduce(_ * _), s"Current shape ${a.shape} does not fit new shape = $shape") 39 | 40 | @annotation.tailrec 41 | def group(ar: Array[Any], shape: List[Int]): Array[Any] = 42 | shape match 43 | case h :: Nil => ar.grouped(h).toArray 44 | case h :: t => group(ar.grouped(h).toArray, t) 45 | case _ => ar 46 | 47 | NDArray[T](group(a.data, newShape.reverse), newShape) 48 | 49 | 50 | @main 51 | def test = 52 | val ones = NDArray.ones[Int](16) 53 | println(ones) 54 | println(ones.reshape(2, 2, 2, 2)) 55 | 56 | NDArray[Int](Array(Array(Array(1))), List(1,1,1)) -------------------------------------------------------------------------------- /src/main/scala/ml/network/loss.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.transformation.castFromTo 4 | import ml.tensors.api._ 5 | import ml.tensors.ops._ 6 | import ml.math.generic._ 7 | 8 | import scala.math.Numeric.Implicits._ 9 | import scala.reflect.ClassTag 10 | 11 | trait Loss[T]: 12 | def apply( 13 | actual: Tensor[T], 14 | predicted: Tensor[T] 15 | ): T 16 | 17 | object LossApi: 18 | private def calcMetric[T: Numeric: ClassTag]( 19 | t1: Tensor[T], t2: Tensor[T], f: (T, T) => T 20 | ) = 21 | (t1, t2) match 22 | case (Tensor1D(a), Tensor1D(b)) => 23 | val sum = (t1, t2).map2(f).sum //TODO: sum and then apply f ? 24 | (sum, t1.length) 25 | case (t @ Tensor2D(a), Tensor2D(b)) => 26 | val (rows, cols) = t.shape2D 27 | val sum = (t1, t2).map2(f).sum //TODO: sum and then apply f ? 28 | (sum, rows * cols) 29 | case (Tensor0D(a), Tensor0D(b)) => 30 | (f(a, b), 1) 31 | case _ => 32 | sys.error(s"Both tensors must be the same shape: ${t1.shape} != ${t2.shape}") 33 | 34 | private def mean[T: Numeric: ClassTag](count: Int, sum: T): Double = 35 | castFromTo[T, Double](sum) / count 36 | 37 | def meanSquareError[T: ClassTag](using n: Numeric[T]) = new Loss[T]: 38 | def calc(a: T, b: T): T = 39 | pow(a - b, n.fromInt(2)) 40 | 41 | override def apply( 42 | actual: Tensor[T], 43 | predicted: Tensor[T] 44 | ): T = 45 | val (sumScore, count) = calcMetric(actual, predicted, calc) 46 | val meanSumScore = mean(count, sumScore) 47 | castFromTo(meanSumScore) 48 | 49 | def crossEntropy[T: ClassTag: Numeric] = new Loss[T]: 50 | def calc(y: T, yHat: T): T = 51 | y * log(yHat) 52 | 53 | override def apply( 54 | actual: Tensor[T], 55 | predicted: Tensor[T] 56 | ): T = 57 | val (sumScore, count) = calcMetric(actual, predicted, calc) 58 | val meanSumScore = mean(count, sumScore) 59 | castFromTo(-meanSumScore) 60 | 61 | def binaryCrossEntropy[T: ClassTag](using n: Numeric[T]) = new Loss[T]: 62 | def calc(y: T, yHat: T): T = 63 | y * log(yHat) + (n.one - y) * log(n.one - yHat) 64 | 65 | override def apply( 66 | actual: Tensor[T], 67 | predicted: Tensor[T] 68 | ): T = 69 | val (sumScore, count) = calcMetric(actual, predicted, calc) 70 | val meanSumScore = mean(count, sumScore) 71 | castFromTo(-meanSumScore) -------------------------------------------------------------------------------- /src/main/scala/examples/multipleRegression.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import ml.preprocessing._ 4 | import ml.transformation.castTo 5 | import ml.tensors.api._ 6 | import ml.tensors.ops._ 7 | import ml.network.api._ 8 | import ml.network.api.given 9 | import ml.network.inits.given 10 | 11 | import java.nio.file.Path 12 | import java.io.{File, PrintWriter} 13 | import scala.reflect.ClassTag 14 | import scala.util.Using 15 | 16 | @main def multipleRegression() = 17 | 18 | def createEncoders[T: Numeric: ClassTag]( 19 | data: Tensor2D[String] 20 | ): Tensor2D[String] => Tensor2D[T] = 21 | val hotEncoder = OneHotEncoder[String, T]().fit(data.col(1)) // geography 22 | val encoder = LabelEncoder[String]().fit(data.col(2)) // gender 23 | 24 | val hot = t => hotEncoder.transform(t, 1) 25 | val label = t => encoder.transform(t, 2) 26 | val typeTransform = (t: Tensor2D[String]) => castTo[T](t.data) 27 | 28 | label andThen hot andThen typeTransform 29 | 30 | val accuracy = accuracyBinaryClassification[Double] 31 | 32 | val ann = Sequential[Double, Adam, RandomUniform]( 33 | binaryCrossEntropy, 34 | learningRate = 0.002d, 35 | metrics = List(accuracy), 36 | batchSize = 64, 37 | gradientClipping = clipByValue(5.0d) 38 | ) 39 | .add(Dense(leakyRelu, 6)) 40 | .add(Dense(leakyRelu, 6)) 41 | .add(Dense(sigmoid)) 42 | 43 | val dataLoader = TextLoader(Path.of("data", "Churn_Modelling.csv")).load() 44 | val data = dataLoader.cols[String](3, -1) 45 | 46 | val encoders = createEncoders[Double](data) 47 | val numericData = encoders(data) 48 | val scaler = StandardScaler[Double]().fit(numericData) 49 | 50 | val prepareData = (t: Tensor2D[String]) => { 51 | val numericData = encoders(t) 52 | scaler.transform(numericData) 53 | } 54 | 55 | val x = prepareData(data) 56 | val y = dataLoader.cols[Double](-1) 57 | 58 | val ((xTrain, xTest), (yTrain, yTest)) = (x, y).split(0.2f) 59 | 60 | val start = System.currentTimeMillis() 61 | val model = ann.train(xTrain, yTrain, epochs = 100) 62 | println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec") 63 | 64 | // Single test 65 | val example = TextLoader( 66 | "n/a,n/a,n/a,600,France,Male,40,3,60000,2,1,1,50000,n/a" 67 | ).cols[String](3, -1) 68 | val testExample = prepareData(example) 69 | val yHat = model(testExample) 70 | val exited = predictedToBinary(yHat.as0D.data) == 1d 71 | println(s"Exited customer? $exited") 72 | 73 | // Test Dataset 74 | val testPredicted = model(xTest) 75 | val value = accuracy(yTest, testPredicted) 76 | println(s"test accuracy = $value") 77 | 78 | storeMetrics(model, Path.of("metrics/ann.csv")) -------------------------------------------------------------------------------- /src/test/scala/ml/network/MaxPoolTest.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | 6 | import scala.reflect.ClassTag 7 | import scala.math.Numeric.Implicits._ 8 | 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | import org.scalatest.matchers.should.Matchers 11 | 12 | class MaxPoolTest extends AnyFlatSpec with Matchers { 13 | val image = Tensor4D( 14 | Array( 15 | Array( 16 | Array( 17 | Array(1d, 2, 3, 3), 18 | Array(2d, 3, 4, 3), 19 | Array(5d, 6, 7, 3) 20 | ) 21 | ) 22 | ) 23 | ) 24 | 25 | it should "do forward and backward propagation without padding" in { 26 | val unpadded = Array( 27 | Array( 28 | Array( 29 | Array(3d, 4, 4), 30 | Array(6d, 7, 7) 31 | ) 32 | ) 33 | ) 34 | 35 | // FORWARD 36 | // given 37 | val noPaddingLayer = MaxPool[Double](padding = false).init(image.shape) 38 | // when 39 | val noPaddingAct = noPaddingLayer(image) 40 | // then 41 | val z = noPaddingAct.z.as4D 42 | z.shape should ===(noPaddingLayer.shape) 43 | z.data should ===(unpadded) 44 | 45 | val unpaddedDelta = Array( 46 | Array( 47 | Array( 48 | Array(1d, 2, 3), 49 | Array(7d, 1, 2) 50 | ) 51 | ) 52 | ) 53 | 54 | val Gradient(unpaddedNextDelta, _, _) = 55 | noPaddingLayer.backward(noPaddingAct, unpaddedDelta.as4D, None) 56 | 57 | unpaddedNextDelta.as4D.data should ===( 58 | Array( 59 | Array( 60 | Array( 61 | Array(0d, 0, 0, 0), 62 | Array(0d, 1, 3, 0), 63 | Array(0d, 7, 2, 0) 64 | ) 65 | ) 66 | ) 67 | ) 68 | } 69 | 70 | it should "do forward propagation with padding" in { 71 | // given 72 | val padded = Array( 73 | Array( 74 | Array( 75 | Array(3d, 4, 4, 3), 76 | Array(6d, 7, 7, 3), 77 | Array(6d, 7, 7, 3) 78 | ) 79 | ) 80 | ) 81 | val paddedLayer = MaxPool[Double](padding = true).init(image.shape) 82 | // when 83 | val a = paddedLayer(image) 84 | 85 | // then 86 | a.z.shape should ===(paddedLayer.shape) 87 | a.z.as4D.data should ===(padded) 88 | 89 | // BACKWARD 90 | // given 91 | val delta = Array( 92 | Array( 93 | Array( 94 | Array(1d, 2, 3, 1), 95 | Array(7d, 1, 2, 1), 96 | Array(1d, 1, 2, 1) 97 | ) 98 | ) 99 | ) 100 | // when 101 | val Gradient(nextDelta, w, b) = paddedLayer.backward(a, delta.as4D, None) 102 | 103 | //then 104 | nextDelta.as4D.shape4D should ===(a.x.as4D.shape4D) 105 | w should ===(None) 106 | b should ===(None) 107 | 108 | withClue(s"$nextDelta") { 109 | nextDelta.as4D.data should ===( 110 | Array( 111 | Array( 112 | Array( 113 | Array(0d, 0, 0, 1), 114 | Array(0d, 1, 3, 1), 115 | Array(0d, 1, 2, 1) 116 | ) 117 | ) 118 | ) 119 | ) 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/main/scala/ml/network/activators.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.transformation.castFromTo 4 | import ml.tensors.api._ 5 | import ml.tensors.ops._ 6 | import ml.math.generic._ 7 | 8 | import math.Ordering.Implicits.infixOrderingOps 9 | import math.Fractional.Implicits.infixFractionalOps 10 | import scala.reflect.ClassTag 11 | 12 | trait ActivationFunc[T]: 13 | val name: String 14 | def apply(x: Tensor[T]): Tensor[T] 15 | def derivative(x: Tensor[T]): Tensor[T] 16 | 17 | object ActivationFuncApi: 18 | def relu[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]: 19 | 20 | override def apply(x: Tensor[T]): Tensor[T] = 21 | x.map(t => if t < n.zero then n.zero else t) 22 | 23 | override def derivative(x: Tensor[T]): Tensor[T] = 24 | x.map(t => if t < n.zero then n.zero else n.one) 25 | 26 | override val name = "relu" 27 | 28 | def leakyRelu[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]: 29 | val scaler = castFromTo[Double, T](0.01) 30 | 31 | override def apply(x: Tensor[T]): Tensor[T] = 32 | x.map(t => if t < n.zero then n.times(scaler, t) else t) 33 | 34 | override def derivative(x: Tensor[T]): Tensor[T] = 35 | x.map(t => if t < n.zero then scaler else n.one) 36 | 37 | override val name = "leakyRelu" 38 | 39 | def sigmoid[T: ClassTag](using n: Fractional[T]) = new ActivationFunc[T]: 40 | 41 | override def apply(x: Tensor[T]): Tensor[T] = 42 | x.map(t => n.one / (n.one + exp(-t))) 43 | 44 | override def derivative(x: Tensor[T]): Tensor[T] = 45 | x.map(t => exp(-t) / pow(n.one + exp(-t), n.fromInt(2))) 46 | 47 | override val name = "sigmoid" 48 | 49 | def softmax[T: ClassTag: Ordering](using n: Fractional[T]) = new ActivationFunc[T]: 50 | val toleration = castFromTo[Double, T](0.9E-6d) 51 | 52 | override def apply(x: Tensor[T]): Tensor[T] = 53 | val applied = x.mapRow { row => 54 | val max = row.max 55 | val expNorm = row.map(v => exp(v - max)) 56 | val sum = expNorm.sum 57 | expNorm.map(_ / sum) 58 | } 59 | 60 | val appliedSum = applied.sumCols.map( 61 | v => 62 | if v.abs - toleration > n.one 63 | then v 64 | else n.one 65 | ) 66 | val totalSum = appliedSum.sumRows.as1D.data.head 67 | assert(totalSum == x.length, 68 | s"Softmax distribution sum is not equal to 1 at some activation, but\n${appliedSum}") 69 | applied 70 | 71 | override def derivative(x: Tensor[T]): Tensor[T] = 72 | val sm = apply(x) 73 | sm.multiply(n.one - sm) 74 | 75 | // override def derivative(x: Tensor[T]): Tensor[T] = 76 | // println(s"derivative x:\n$x") 77 | // val sm = apply(x) 78 | // sm.mapRow { row => 79 | // val t = Tensor1D(row) 80 | // val dxDs = t.diag - (t * t) 81 | // dxDs.sumRows.as1D.data 82 | // } 83 | 84 | override val name = "softmax" 85 | 86 | def linear[T] = new ActivationFunc[T]: 87 | override def apply(x: Tensor[T]): Tensor[T] = x 88 | override def derivative(x: Tensor[T]): Tensor[T] = x 89 | override val name = "linear" -------------------------------------------------------------------------------- /src/main/scala/examples/linearRegression.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import ml.network.api._ 4 | import ml.network.api.given 5 | import ml.tensors.api._ 6 | import ml.tensors.ops._ 7 | import ml.network.inits.given 8 | 9 | import scala.reflect.ClassTag 10 | import scala.math.Numeric.Implicits._ 11 | import scala.collection.mutable.ArrayBuffer 12 | import scala.util.{Random, Using} 13 | import scala.collection.parallel.CollectionConverters._ 14 | 15 | import java.io.{File,PrintWriter} 16 | 17 | @main 18 | def linearRegression() = lrTest(false) 19 | 20 | def lrTest(fromTest: Boolean = true) = 21 | type Precision = Float 22 | val random = new Random() 23 | val weight = random.nextFloat() 24 | val bias = random.nextFloat() 25 | 26 | def batch(batchSize: Int): (ArrayBuffer[Precision], ArrayBuffer[Precision]) = 27 | val inputs = ArrayBuffer.empty[Precision] 28 | val outputs = ArrayBuffer.empty[Precision] 29 | def noise = 0//random.nextFloat / 5 30 | (0 until batchSize).foldLeft(inputs, outputs) { case ((x, y), _) => 31 | val rnd = random.nextFloat 32 | x += rnd + noise 33 | y += bias + weight * rnd + noise 34 | (x, y) 35 | } 36 | 37 | val optimizer = "adam" 38 | 39 | val ann = Sequential[Precision, Adam, RandomUniform]( 40 | meanSquareError, 41 | learningRate = 0.001, 42 | batchSize = 16, 43 | gradientClipping = _.clipByNorm(10.0) 44 | ).add(Dense()) 45 | 46 | val (xBatch, yBatch) = batch(10000) 47 | val x = Tensor1D(xBatch.toArray) 48 | val y = Tensor1D(yBatch.toArray) 49 | val ((xTrain, xTest), (yTrain, yTest)) = (x, y).split(0.2) 50 | 51 | val model = ann.train(xTrain.T, yTrain.T, epochs = 100) 52 | 53 | println(s"current weight: ${model.layers}") 54 | println(s"true weight: $weight") 55 | println(s"true bias: $bias") 56 | 57 | // Test Dataset 58 | val testPredicted = model(xTest.T) 59 | val value = meanSquareError[Precision].apply(yTest.T, testPredicted) 60 | println(s"test meanSquareError = $value") 61 | 62 | if !fromTest then 63 | ////////////////////////////////////////// 64 | // Store all posible data for plotting /// 65 | ////////////////////////////////////////// 66 | 67 | // datapoints 68 | val dataPoints = xBatch.zip(yBatch).map((x, y) => List(x.toString, y.toString)) 69 | store("metrics/datapoints.csv", "x,y", dataPoints.toList) 70 | 71 | //Store loss metric into CSV file 72 | val lossData = model.history.losses.zipWithIndex.map((l,i) => List(i.toString, l.toString)) 73 | store("metrics/lr.csv", "epoch,loss", lossData) 74 | 75 | //gradient 76 | val gradientData = model.history.layers.zip(model.history.losses) 77 | .map { (layers, loss) => 78 | layers.headOption.collect { 79 | case l @ Dense(_, _, _ , Some(w), Some(b), _) => 80 | List(w.as1D.data.head.toString, b.as1D.data.head.toString) 81 | case _ => Nil 82 | }.toList.flatten :+ loss.toString 83 | } 84 | 85 | store(s"metrics/$optimizer-gradient.csv", "w,b,loss", gradientData) 86 | 87 | // loss surface 88 | val weights = for (i <- 0 until 100) yield i/100.0f 89 | val biases = weights 90 | 91 | println("Calculating loss surface") 92 | val losses = weights.par.map { w => 93 | val wT = w.as2D 94 | biases.foldLeft(ArrayBuffer.empty[Precision]) { (acc, b) => 95 | val loss = ann.loss(x.T, y.T, List(Dense(w = Some(wT), b = Some(b.as1D)))) 96 | acc :+ loss 97 | } 98 | } 99 | println("Done calculating loss surface.") 100 | 101 | val metricsData = weights.zip(biases).zip(losses) 102 | .map { case ((w, b), l) => List(w.toString, b.toString, l.mkString("\"", ",", "\"")) } 103 | 104 | store(s"metrics/$optimizer-lr-surface.csv", "w,b,l", metricsData.toList) -------------------------------------------------------------------------------- /src/main/scala/ml/preprocessing/encoders.scala: -------------------------------------------------------------------------------- 1 | package ml.preprocessing 2 | 3 | import Encoder._ 4 | import ml.transformation.castFromTo 5 | import ml.tensors.api._ 6 | import ml.tensors.ops.T 7 | 8 | import scala.collection.mutable.ArrayBuffer 9 | import scala.reflect.ClassTag 10 | 11 | object Encoder: 12 | def toClasses[T: ClassTag: Ordering, U: ClassTag]( 13 | samples: Tensor1D[T] 14 | ): Map[T, U] = 15 | samples.data.distinct.sorted.zipWithIndex.toMap.view 16 | .mapValues(castFromTo[Int, U]) 17 | .toMap 18 | 19 | case class LabelEncoder[T: ClassTag: Ordering]( 20 | classes: Map[T, T] = Map.empty[T, T] 21 | ): 22 | def fit(samples: Tensor1D[T]): LabelEncoder[T] = 23 | LabelEncoder(toClasses[T, T](samples)) 24 | 25 | def transform(t: Tensor2D[T], col: Int): Tensor2D[T] = 26 | val data = t.data.map( 27 | _.zipWithIndex.map { (d, i) => 28 | if i == col then classes.getOrElse(d, d) else d 29 | } 30 | ) 31 | Tensor2D(data) 32 | 33 | /** 34 | * T - key type 35 | * U - numeric value type for the key type 36 | */ 37 | case class OneHotEncoder[ 38 | T: Ordering: ClassTag, 39 | U: Ordering: ClassTag 40 | ]( 41 | classes: Map[T, U] = Map.empty[T, U], 42 | notFound: Int = -1 43 | )(using n: Numeric[U]): 44 | def fit(samples: Tensor1D[T]): OneHotEncoder[T, U] = 45 | OneHotEncoder[T, U](toClasses[T, U](samples)) 46 | 47 | def transform(t: Tensor1D[T]): Tensor2D[T] = 48 | Tensor2D(t.data.map(encode)) 49 | 50 | private def encode(v: T) = 51 | val zero = castFromTo[Int, T](0) 52 | val array = Array.fill[T](classes.size)(zero) 53 | val pos = classes.get(v) 54 | pos match 55 | case Some(p) => 56 | array(n.toInt(p)) = castFromTo[U, T](n.one) 57 | case None => 58 | array(0) = castFromTo[U, T](n.fromInt(notFound)) 59 | array 60 | 61 | def transform(t: Tensor2D[T], col: Int): Tensor2D[T] = 62 | val data = t.data.map { row => 63 | row.zipWithIndex 64 | .foldLeft(ArrayBuffer.empty[T]) { case (acc, (v, i)) => 65 | if i == col then acc ++ encode(v) 66 | else acc :+ v 67 | } 68 | .toArray[T] 69 | } 70 | Tensor2D(data) 71 | 72 | case class ColumnStat(mean: Double, stdDev: Double) 73 | 74 | case class StandardScaler[T: Numeric: ClassTag]( 75 | stats: Array[ColumnStat] = Array.empty 76 | ): 77 | def fit(samples: Tensor[T]): StandardScaler[T] = 78 | samples match 79 | case Tensor1D(data) => 80 | StandardScaler(Array(fitColumn(data))) 81 | case t @ Tensor2D(_) => 82 | StandardScaler(t.T.data.map(fitColumn)) 83 | case Tensor0D(_) => StandardScaler() 84 | case _ => 85 | sys.error(s"Not implemented for: $samples") 86 | 87 | private def fitColumn(data: Array[T]) = 88 | val nums = data.map(castFromTo[T, Double]) 89 | val mean = nums.sum / data.length 90 | val stdDev = math.sqrt( 91 | nums.map(n => math.pow(n - mean, 2)).sum / (data.length - 1) 92 | ) 93 | ColumnStat(mean, stdDev) 94 | 95 | def transform(t: Tensor[T]): Tensor[T] = 96 | t match 97 | case Tensor1D(data) => 98 | val stat = stats.headOption.getOrElse( 99 | sys.error(s"There is no statistics for $t") 100 | ) 101 | val res = data.map(n => 102 | castFromTo[Double, T](scale(castFromTo[T, Double](n), stat)) 103 | ) 104 | Tensor1D(res) 105 | case t2 @ Tensor2D(data) => 106 | val (rows, cols) = t2.shape2D 107 | val res = Array.ofDim[T](rows, cols) 108 | 109 | for i <- 0 until rows do 110 | for j <- 0 until cols do 111 | val stat = stats(j) 112 | val n = castFromTo[T, Double](data(i)(j)) 113 | res(i)(j) = castFromTo[Double, T](scale(n, stat)) 114 | Tensor2D(res) 115 | case Tensor0D(_) => t // scaling is not applicable for scalar tensor 116 | case _ => sys.error(s"Not implemented for: $t") 117 | 118 | private def scale(n: Double, stat: ColumnStat): Double = 119 | (n - stat.mean) / stat.stdDev 120 | -------------------------------------------------------------------------------- /src/main/scala/ml/tensors/tensor.scala: -------------------------------------------------------------------------------- 1 | package ml.tensors.api 2 | 3 | import scala.math.Numeric.Implicits._ 4 | import scala.reflect.ClassTag 5 | import math.Ordering.Implicits.infixOrderingOps 6 | import Tensor._ 7 | 8 | sealed trait Tensor[T]: 9 | 10 | def shape: List[Int] 11 | 12 | def length: Int = shape.headOption.getOrElse(0) 13 | 14 | def shape(axis: Int): List[Int] = shape.drop(axis) 15 | 16 | def meta[T: ClassTag] = 17 | s"shape: ${shape.mkString("x")}, ${getClass.getSimpleName}[${summon[ClassTag[T]]}]" 18 | 19 | object Tensor: 20 | def printArray(a: Array[_], meta: String): String = 21 | def loop(a: Array[_], level: Int = 1): Array[String] = 22 | a.map { e => 23 | e match 24 | case ar: Array[_] => 25 | val start = s"\n${" " * level}[" 26 | val body = loop(ar, level + 1).mkString(",") 27 | val end = if body.last == ']' then s"\n${" " * level}]" else "]" 28 | s"$start$body$end" 29 | case _ => s"$e" 30 | } 31 | val str = loop(a).mkString(", ") 32 | s"$meta:\n[" + str + (if str.last == ']' then "\n" else "") + "]" 33 | 34 | def of[T:ClassTag](size: Int, size2: Int): Tensor2D[T] = 35 | Tensor2D[T](Array.fill(size)(of[T](size2).data)) 36 | 37 | def of[T: ClassTag](size: Int): Tensor1D[T] = 38 | Tensor1D[T](Array.ofDim[T](size)) 39 | 40 | case class Tensor0D[T: ClassTag](data: T) extends Tensor[T]: 41 | override val length: Int = 1 42 | 43 | override val shape: List[Int] = length :: Nil 44 | 45 | override def toString: String = 46 | s"$meta:\n" + data + "\n" 47 | 48 | case class Tensor1D[T: ClassTag](data: Array[T]) extends Tensor[T]: 49 | override def shape: List[Int] = List(data.length) 50 | 51 | override def toString: String = 52 | printArray(data, meta) 53 | 54 | override def length: Int = data.length 55 | 56 | object Tensor1D: 57 | def apply[T: ClassTag](data: T*): Tensor1D[T] = 58 | Tensor1D[T](data.toArray) 59 | 60 | case class Tensor2D[T: ClassTag](data: Array[Array[T]]) extends Tensor[T]: 61 | override def shape: List[Int] = 62 | shape2D.toList 63 | 64 | def shape2D: (Int, Int) = 65 | (data.length, data.headOption.map(_.length).getOrElse(0)) 66 | 67 | private val meta = 68 | s"shape: ${shape.mkString("x")}, Tensor2D[${summon[ClassTag[T]]}]" 69 | 70 | override def toString: String = 71 | printArray(data, meta) 72 | 73 | override def length: Int = data.length 74 | 75 | override def shape(axis: Int) = 76 | shape.drop(axis) 77 | 78 | object Tensor2D: 79 | def apply[T: ClassTag](rows: Array[T]*): Tensor2D[T] = 80 | Tensor2D[T](rows.toArray) 81 | 82 | case class Tensor3D[T: ClassTag](data: Array[Array[Array[T]]]) extends Tensor[T]: 83 | def shape3D: (Int, Int, Int) = 84 | val rows = data.headOption.map(_.length).getOrElse(0) 85 | val cols = data.headOption.flatMap(_.headOption.map(_.length)).getOrElse(0) 86 | (data.length, rows, cols) 87 | 88 | override def shape: List[Int] = 89 | shape3D.toList 90 | 91 | override def length: Int = data.length 92 | 93 | override def toString: String = 94 | printArray(data, meta) 95 | 96 | object Tensor3D: 97 | def apply[T: ClassTag](matrices: Tensor2D[T]*): Tensor3D[T] = 98 | Tensor3D(matrices.toArray.map(_.data)) 99 | 100 | case class Tensor4D[T: ClassTag](data: Array[Array[Array[Array[T]]]]) extends Tensor[T]: 101 | def shape4D: (Int, Int, Int, Int) = 102 | val cubes = data.headOption.map(_.length).getOrElse(0) 103 | val rows = data.headOption.flatMap(_.headOption.map(_.length)).getOrElse(0) 104 | val cols = for { 105 | cube <- data.headOption 106 | row <- cube.headOption 107 | col <- row.headOption 108 | } yield col.length 109 | 110 | (data.length, cubes, rows, cols.getOrElse(0)) 111 | 112 | override def shape: List[Int] = 113 | shape4D.toList 114 | 115 | override def length: Int = data.length 116 | 117 | override def toString: String = 118 | printArray(data, meta) 119 | 120 | object Tensor4D: 121 | def apply[T: ClassTag](cubes: Tensor3D[T]*): Tensor4D[T] = 122 | Tensor4D(cubes.toArray.map(t => t.data)) -------------------------------------------------------------------------------- /src/main/scala/examples/MnistLoader.scala: -------------------------------------------------------------------------------- 1 | package examples 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | 6 | import scala.collection.mutable.ArrayBuffer 7 | import scala.reflect.ClassTag 8 | import scala.util.Using 9 | 10 | import java.io.{DataInputStream, BufferedInputStream, FileInputStream} 11 | import java.nio.file.{Files, Path} 12 | import java.util.zip.GZIPInputStream 13 | 14 | // data to be taken from http://yann.lecun.com/exdb/mnist/ or at GitHub somewhere 15 | object MnistLoader: 16 | val trainImagesFilename = "train-images-idx3-ubyte.gz" 17 | val trainLabelsFilename = "train-labels-idx1-ubyte.gz" 18 | val testImagesFilename = "t10k-images-idx3-ubyte.gz" 19 | val testLabelsFilename = "t10k-labels-idx1-ubyte.gz" 20 | 21 | val LabelFileMagicNumber = 2049 22 | val ImageFileMagicNumber = 2051 23 | 24 | case class MnistDataset[T: Numeric]( 25 | trainImage: Tensor[T], 26 | trainLabels: Tensor[T], 27 | testImages: Tensor[T], 28 | testLabels: Tensor[T] 29 | ) 30 | 31 | case class LoaderCfg(samples: Int, numberOfImages: Int, nRows: Int, nCols: Int) 32 | 33 | def loadData[T: Numeric: ClassTag]( 34 | mnistDir: String, 35 | samples: Int = 60_000, 36 | flat: Boolean = true 37 | ): MnistDataset[T] = 38 | val (trainImages, trainLabels) = loadDataset( 39 | Path.of(mnistDir, trainImagesFilename), 40 | Path.of(mnistDir, trainLabelsFilename), 41 | samples, 42 | flat 43 | ) 44 | val (testImages, testLabels) = loadDataset( 45 | Path.of(mnistDir, testImagesFilename), 46 | Path.of(mnistDir, testLabelsFilename), 47 | samples, 48 | flat 49 | ) 50 | MnistDataset(trainImages, trainLabels, testImages, testLabels) 51 | 52 | private def loadDataset[T: ClassTag]( 53 | images: Path, 54 | labels: Path, 55 | samples: Int, 56 | flat: Boolean 57 | )(using n: Numeric[T]): (Tensor[T], Tensor[T]) = 58 | Using.resource( 59 | new DataInputStream( 60 | new GZIPInputStream(Files.newInputStream(images)) 61 | ) 62 | ) { imageInputStream => 63 | val magicNumber = imageInputStream.readInt() 64 | assert( 65 | magicNumber == ImageFileMagicNumber, 66 | s"Image file magic number is incorrect, expected $ImageFileMagicNumber, but was $magicNumber" 67 | ) 68 | 69 | val numberOfImages = imageInputStream.readInt() 70 | val (nRows, nCols) = 71 | (imageInputStream.readInt(), imageInputStream.readInt()) 72 | 73 | val labelsTensor = Using.resource( 74 | new DataInputStream( 75 | new GZIPInputStream(Files.newInputStream(labels)) 76 | ) 77 | ) { labelInputStream => 78 | val labelMagicNumber = labelInputStream.readInt() 79 | assert( 80 | labelMagicNumber == LabelFileMagicNumber, 81 | s"Image file magic number is incorrect, expected $LabelFileMagicNumber, but was $labelMagicNumber" 82 | ) 83 | 84 | val numberOfLabels = labelInputStream.readInt() 85 | 86 | assert( 87 | numberOfImages == numberOfLabels, 88 | s"Number of images is not equal to number of labels, $numberOfImages != $numberOfLabels" 89 | ) 90 | 91 | labelInputStream.readAllBytes 92 | .map(l => n.fromInt(l)) 93 | .take(samples) 94 | .as1D 95 | } 96 | 97 | val cfg = LoaderCfg(samples, numberOfImages, nRows, nCols) 98 | val images = 99 | if flat then readAsVector(cfg, imageInputStream) 100 | else readAsMatrix(cfg, imageInputStream) 101 | 102 | (images, labelsTensor) 103 | } 104 | 105 | private def readAsVector[T: ClassTag](cfg: LoaderCfg, imageInputStream: DataInputStream)(using n: Numeric[T]) = 106 | val images = ArrayBuffer.empty[Array[T]] 107 | val singeImageSize = cfg.nRows * cfg.nCols 108 | 109 | for _ <- (0 until cfg.numberOfImages) do 110 | images += readNBytes(singeImageSize, imageInputStream) 111 | 112 | images.toArray.take(cfg.samples).as2D 113 | 114 | private def readAsMatrix[T: ClassTag](cfg: LoaderCfg, imageInputStream: DataInputStream)(using n: Numeric[T]) = 115 | val images = ArrayBuffer.empty[Array[Array[Array[T]]]] 116 | 117 | for _ <- (0 until cfg.numberOfImages) do 118 | val image = ArrayBuffer.empty[Array[T]] 119 | for _ <- (0 until cfg.nRows) do 120 | image += readNBytes(cfg.nCols, imageInputStream) 121 | images += Array(image.toArray) 122 | 123 | images.toArray.take(cfg.samples).as4D 124 | 125 | private def readNBytes[T: ClassTag](count: Int, is: DataInputStream)(using n: Numeric[T]) = 126 | (0 until count).map(_ => n.fromInt(is.readUnsignedByte())).toArray -------------------------------------------------------------------------------- /plots.sc: -------------------------------------------------------------------------------- 1 | // scala 2.13.4 2 | 3 | import $ivy. `org.carbonateresearch::picta:0.1.1` 4 | import org.carbonateresearch.picta.render.Html.initNotebook // required to initialize jupyter notebook mode 5 | initNotebook() // stops standard output 6 | 7 | import org.carbonateresearch.picta.IO._ 8 | import org.carbonateresearch.picta._ 9 | 10 | val metricsDir = getWorkingDirectory + "/../metrics" 11 | val filepath = metricsDir + "/lr.csv" 12 | val data = readCSV(filepath) 13 | val epochs = data("epoch").map(_.toInt) 14 | val losses = data("loss").map(_.toDouble) 15 | 16 | val series = XY(epochs, losses).asType(SCATTER).drawStyle(LINES) 17 | val chart = Chart().addSeries(series.setName("Learning loss")).setTitle("Linear Regression Example: Loss vs. Epoch") 18 | chart.plotInline 19 | 20 | val filepath = s"$metricsDir/datapoints.csv" 21 | val data = readCSV(filepath) 22 | val x = data("x").map(_.toDouble) 23 | val y = data("y").map(_.toDouble) 24 | val w = 0.6911375732835148 25 | val b = 0.7800122918798839 26 | def model(x: Double) = w * x + b 27 | val m1 = Array(-0.1d, 1.3d) 28 | val m2 = List(model(m1(0)), model(m1(1))) 29 | 30 | //val marker = Marker() setSymbol SQUARE_OPEN setColor "red" 31 | val inputData = XY(x, y) asType SCATTER setName "Input Data" drawStyle MARKERS //setMarker marker 32 | val modelData = XY(m1.toList, m2) asType SCATTER setName "Model" // drawStyle MARKERS 33 | val chart = Chart() addSeries(inputData, modelData) setTitle("Data points vs. Trained model") 34 | 35 | chart.plotInline 36 | 37 | val filepath = metricsDir + "/ann.csv" 38 | val data = readCSV(filepath) 39 | val epochs = data("epoch").map(_.toInt) 40 | val losses = data("loss").map(_.toDouble) 41 | val accuracy = data("accuracy").map(_.toDouble) 42 | val maxAccuracy = accuracy.max 43 | val normAccuracy = accuracy.map(_ / maxAccuracy) 44 | val maxLoss = losses.max 45 | val normLoss = losses.map(_ / maxLoss) 46 | 47 | val loss = XY(epochs, losses) asType SCATTER drawStyle LINES 48 | val acc = XY(epochs, accuracy) asType SCATTER drawStyle LINES 49 | val lossChart = 50 | Chart() addSeries( 51 | loss.setName("Learning loss"), 52 | acc.setName("Training Accuracy") 53 | ) setTitle "ANN Example: Loss vs. Accuracy vs. Epoch" 54 | lossChart.plotInline 55 | 56 | val data = readCSV(s"$metricsDir/adam-lr-surface.csv") 57 | val w = data("w").map(_.toDouble).reverse 58 | val b = data("b").map(_.toDouble).reverse 59 | val loss = data("l").map(_.split(",").map(_.toDouble)).reverse 60 | val surface = XYZ(x=w, y=b, z=loss.flatten, n=loss(0).length).asType(SURFACE).setColorBar("Loss", RIGHT_SIDE) 61 | 62 | val gradientData = readCSV(s"$metricsDir/adam-gradient.csv") 63 | val gw = gradientData("w").map(_.toDouble).reverse 64 | val gb = gradientData("b").map(_.toDouble).reverse 65 | val gLoss = gradientData("loss").map(_.toDouble).reverse 66 | val gradient = XYZ(x=gw, y=gb, z=gLoss).asType(SCATTER3D).setName("Gradient").drawLinesMarkers 67 | 68 | val surfaceChart = Chart() 69 | .addSeries(gradient,surface) 70 | .setTitle("Loss Function Surface") 71 | .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO) 72 | .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss")) 73 | surfaceChart.plotInline 74 | 75 | import org.carbonateresearch.picta.options.Marker 76 | import org.carbonateresearch.picta.SymbolShape._ 77 | import org.carbonateresearch.picta.options.AUTO 78 | 79 | val contour = XYZ(x=w, y=b, z=loss.flatten, n=loss(0).length).asType(CONTOUR) 80 | val adamdMarker = Marker().setColor("rgb(200,0,0)").setSymbol(SQUARE_OPEN) 81 | val adamGradient = XY(x=gw, y=gb).asType(SCATTER).setName("Adam Gradient").setMarker(adamdMarker) 82 | .drawLinesMarkers 83 | 84 | val simpledGradientData = readCSV(s"$metricsDir/simplegd-gradient.csv") 85 | val simpleGw = simpledGradientData("w").map(_.toDouble).reverse 86 | val simpleGb = simpledGradientData("b").map(_.toDouble).reverse 87 | val simpleGdmarker = Marker().setColor("rgb(0,200,0)").setSymbol(SQUARE_OPEN) 88 | val simpleGDGradient = XY(x=simpleGw, y=simpleGb).asType(SCATTER) 89 | .setName("Classic Gradient Descent").setMarker(simpleGdmarker).drawLinesMarkers 90 | 91 | val simpleGDAnimation = 92 | (0 to simpleGw.length-1) 93 | .map(x => XY(simpleGw.take(x+1), simpleGb.take(x+1)) setName "Classic Gradient Descent") 94 | .toList 95 | 96 | val adamAnimation = 97 | (0 to gw.length-1) 98 | .map(x => XY(gw.take(x+1), gb.take(x+1)) setName "Adam") 99 | .toList 100 | 101 | val animatedChart = 102 | Chart(animated = true, transition_duration=simpleGw.length, animate_multiple_series = true) 103 | //.addSeries(contour) 104 | .addSeries(simpleGDAnimation) 105 | .addSeries(adamAnimation) 106 | .setTitle("Gradient Trace") 107 | .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO) 108 | .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss")) 109 | 110 | animatedChart.plotInline 111 | 112 | val countourChart = Chart() 113 | .addSeries(contour, adamGradient, simpleGDGradient) 114 | .setTitle("Loss Contour") 115 | .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO) 116 | .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss")) 117 | 118 | countourChart.plotInline 119 | -------------------------------------------------------------------------------- /src/main/scala/ml/network/optimizers.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | import ml.transformation.castFromTo 6 | 7 | import scala.collection.mutable.ListBuffer 8 | import scala.reflect.ClassTag 9 | import scala.math.Numeric.Implicits._ 10 | 11 | // suported Optimizers 12 | type Adam 13 | type StandardGD 14 | type Stub 15 | 16 | trait Optimizer[U]: 17 | 18 | def updateWeights[T: ClassTag]( 19 | layers: List[Layer[T]], 20 | activations: List[Activation[T]], 21 | error: Tensor[T], 22 | cfg: OptimizerCfg[T], 23 | timestep: Int 24 | )(using n: Fractional[T]): List[Layer[T]] 25 | 26 | def init[T: ClassTag: Numeric](w: Tensor[T], b: Tensor[T]): Option[OptimizerParams[T]] = None 27 | 28 | object optimizers: 29 | given Optimizer[Stub] with 30 | override def updateWeights[T: ClassTag]( 31 | layers: List[Layer[T]], 32 | activations: List[Activation[T]], 33 | error: Tensor[T], 34 | c: OptimizerCfg[T], 35 | timestep: Int 36 | )(using n: Fractional[T]): List[Layer[T]] = layers 37 | 38 | given Optimizer[Adam] with 39 | 40 | override def init[T: ClassTag: Numeric](w: Tensor[T], b: Tensor[T]): Option[OptimizerParams[T]] = 41 | Some(AdamState[T](w.zero, w.zero, b.zero, b.zero)) 42 | 43 | override def updateWeights[T: ClassTag]( 44 | layers: List[Layer[T]], 45 | activations: List[Activation[T]], 46 | error: Tensor[T], 47 | c: OptimizerCfg[T], 48 | timestep: Int 49 | )(using n: Fractional[T]): List[Layer[T]] = 50 | val AdamCfg(b1, b2, eps) = c.adam 51 | 52 | def correction(gradient: Tensor[T], m: Tensor[T], v: Tensor[T]) = 53 | val mt = (b1 * m) + ((n.one - b1) * gradient) 54 | val vt = (b2 * v) + ((n.one - b2) * gradient.sqr) 55 | val mHat = mt :/ (n.one - (b1 ** timestep)) 56 | val vHat = vt :/ (n.one - (b2 ** timestep)) 57 | 58 | val corr = c.learningRate * (mHat / (vHat.sqrt + eps)) 59 | (corr, mt, vt) 60 | 61 | layers 62 | .zip(activations) 63 | .foldRight( 64 | ListBuffer.empty[Layer[T]], 65 | error, 66 | None: Option[Tensor[T]] 67 | ) { 68 | case ( 69 | (layer, a), 70 | (ls, prevDelta, prevWeight) 71 | ) => 72 | val Gradient(delta, wOpt, bOpt) = layer.backward(a, prevDelta, prevWeight) 73 | val (updated, weight) = (layer, wOpt, bOpt) match 74 | case (o: Optimizable[T], Some(w), Some(b)) => 75 | // Adam 76 | o.optimizerParams match 77 | case Some(AdamState(mw, vw, mb, vb)) => 78 | val wGradient = c.clip(w) 79 | val bGradient = c.clip(b).sumRows 80 | val batchSize = n.fromInt(a.x.length) 81 | val (corrW, weightM, weightV) = correction(wGradient :/ batchSize, mw, vw) 82 | val (corrB, biasM, biasV) = correction(bGradient :/ batchSize, mb, vb) 83 | val adamState = Some(AdamState(weightM, weightV, biasM, biasV)) 84 | (o.update(corrW, corrB, adamState), o.w) 85 | case _ => 86 | (layer, None) // does nothing if Adam state is not set 87 | case _ => 88 | (layer, None) // does nothing if one of the params is empty 89 | (updated +: ls, delta, weight) 90 | } 91 | ._1.toList 92 | 93 | given Optimizer[StandardGD] with 94 | 95 | override def updateWeights[T: ClassTag]( 96 | layers: List[Layer[T]], 97 | activations: List[Activation[T]], 98 | error: Tensor[T], 99 | cfg: OptimizerCfg[T], 100 | timestep: Int 101 | )(using n: Fractional[T]): List[Layer[T]] = 102 | layers 103 | .zip(activations) 104 | .foldRight( 105 | ListBuffer.empty[Layer[T]], 106 | error, 107 | None: Option[Tensor[T]] 108 | ) { 109 | case ( 110 | (layer, a), 111 | (ls, prevDelta, prevWeight) 112 | ) => 113 | val Gradient(delta, w, b) = layer.backward(a, prevDelta, prevWeight) 114 | val (updated, weight) = (layer, w, b) match 115 | case (o: Optimizable[T], Some(w), Some(b)) => 116 | val batchSize = n.fromInt(a.x.length) 117 | val wGradient = cfg.clip(w) :/ batchSize 118 | val bGradient = cfg.clip(b).sumRows :/ batchSize 119 | val corrW = cfg.learningRate * wGradient 120 | val corrB = cfg.learningRate * bGradient 121 | (o.update(corrW, corrB), o.w) 122 | case _ => 123 | (layer, None) 124 | (updated +: ls, delta, weight) 125 | } 126 | ._1.toList 127 | 128 | case class OptimizerCfg[T: ClassTag: Fractional]( 129 | learningRate: T, 130 | clip: GradientClipping[T] = GradientClippingApi.noClipping[T], 131 | adam: AdamCfg[T] 132 | ) 133 | 134 | sealed trait OptimizerParams[T] 135 | 136 | case class AdamState[T](mw: Tensor[T], vw: Tensor[T], mb: Tensor[T], vb: Tensor[T]) extends OptimizerParams[T] 137 | 138 | case class AdamCfg[T: ClassTag](b1: T, b2: T, eps: T) 139 | 140 | object AdamCfg: 141 | 142 | def default[T: ClassTag]: AdamCfg[T] = 143 | AdamCfg[T]( 144 | castFromTo[Double, T](0.9), 145 | castFromTo[Double, T](0.999), 146 | castFromTo[Double, T](10E-8) 147 | ) 148 | 149 | trait GradientClipping[T] extends (Tensor[T] => Tensor[T]) 150 | 151 | object GradientClippingApi: 152 | def clipByValue[T: Fractional: ClassTag](value: T): GradientClipping[T] = 153 | _.clipInRange(-value, value) 154 | 155 | def clipByNorm[T: Fractional: ClassTag](value: T): GradientClipping[T] = 156 | _.clipByNorm(value) 157 | 158 | inline def noClipping[T]: GradientClipping[T] = t => t -------------------------------------------------------------------------------- /src/test/scala/ml/network/Conv2DTest.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.transformation.castFromTo 4 | import ml.tensors.api._ 5 | import ml.tensors.ops._ 6 | import optimizers.given_Optimizer_Adam as adam 7 | 8 | import scala.reflect.ClassTag 9 | import scala.math.Numeric.Implicits._ 10 | import scala.collection.mutable.ListBuffer 11 | 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | class Conv2DTest extends AnyFlatSpec with Matchers { 16 | def testActivation[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]: 17 | override def apply(x: Tensor[T]): Tensor[T] = x.map(_ + n.one) 18 | override def derivative(x: Tensor[T]): Tensor[T] = apply(x) 19 | override val name = "test" 20 | 21 | given testInit[T: ClassTag](using n: Numeric[T]): ParamsInitializer[T, RandomUniform] with 22 | def gen: T = n.one 23 | 24 | override def weights(rows: Int, cols: Int): Tensor2D[T] = 25 | Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen))) 26 | 27 | override def biases(length: Int): Tensor1D[T] = 28 | inits.zeros(length) 29 | 30 | val image1 = Tensor3D(Array( 31 | Array( 32 | Array(1d, 2, 3, 3), 33 | Array(2d, 3, 4, 3), 34 | Array(5d, 6, 7, 3) 35 | ), 36 | Array( 37 | Array(1d, 2, 3, 1), 38 | Array(2d, 3, 4, 1), 39 | Array(5d, 6, 7, 1) 40 | ), 41 | Array( 42 | Array(1d, 2, 3, 2), 43 | Array(2d, 3, 4, 2), 44 | Array(5d, 6, 7, 2) 45 | ) 46 | )) 47 | 48 | val image2 = Tensor3D(Array( 49 | Array( 50 | Array(1d, 2, 3, 1), 51 | Array(2d, 3, 4, 1), 52 | Array(5d, 6, 7, 1) 53 | ), 54 | Array( 55 | Array(1d, 2, 3, 2), 56 | Array(2d, 3, 4, 2), 57 | Array(5d, 6, 7, 2) 58 | ), 59 | Array( 60 | Array(1d, 2, 3, 3), 61 | Array(2d, 3, 4, 3), 62 | Array(5d, 6, 7, 3) 63 | ) 64 | )) 65 | 66 | val images = Tensor4D(image1, image2) 67 | 68 | it should "do forward propagation" in { 69 | // given 70 | val inputShape = images.shape4D 71 | 72 | val layer = Conv2D[Double]( 73 | f = testActivation, 74 | filterCount = 3, 75 | kernel = (2, 2), 76 | strides = (1, 1) 77 | ).init(inputShape.toList, testInit, adam) 78 | 79 | // when 80 | val activation = layer(images) 81 | val (imageCount, inputChannels, width, height) = inputShape 82 | 83 | // then 84 | activation.z.shape should ===(List(imageCount, layer.filterCount, 2, 3)) 85 | 86 | val w = layer.w.getOrElse(fail("Weight must not be empty")) 87 | val b = layer.b.getOrElse(fail("Bias must not be empty")) 88 | 89 | def applyFilter[T: ClassTag: Numeric](filter: Array[Array[T]], window: Array[Array[T]]): T = 90 | filter.zip(window).map((a, b) => a.zip(b).map(_ * _).sum).sum 91 | 92 | def filterChannel[T: Numeric: ClassTag](channel: Array[Array[T]], filter: Array[Array[T]]) = 93 | val rows = ListBuffer[Array[T]]() 94 | 95 | for i <- 0 to width - layer.kernel._1 by layer.strides._1 do 96 | val img = channel.drop(i).take(layer.kernel._1) 97 | val row = ListBuffer.empty[T] 98 | 99 | for j <- 0 to height - layer.kernel._1 by layer.strides._2 do 100 | val window = img.map(_.drop(j).take(layer.kernel._2)) 101 | row += applyFilter(filter, window) 102 | 103 | rows += row.toArray 104 | rows.toArray 105 | 106 | def filterChannels[T: ClassTag : Numeric](filters: Tensor4D[T], images: Tensor4D[T]) = 107 | images.data.map { image => 108 | filters.data.map { channels => 109 | channels.zip(image).map { (fc, ic) => 110 | filterChannel(ic, fc).as2D 111 | }.reduce(_ + _) 112 | } 113 | } 114 | 115 | val expectedActivities = filterChannels(w.as4D, images).as4D 116 | 117 | val layerActivity = activation.z.as4D.data 118 | layerActivity.zip(expectedActivities.data).foreach { (actual, expected) => 119 | actual should ===(expected) 120 | } 121 | 122 | val expectedActivation = layer.f(expectedActivities) 123 | activation.a.as4D.data sameElements expectedActivation.as4D.data 124 | } 125 | 126 | it should "do backward propagation from max pooling layer" in { 127 | // given 128 | val inputShape = images.shape4D 129 | val convLayer = Conv2D[Double]( 130 | f = testActivation, 131 | filterCount = 3, 132 | kernel = (2, 2), 133 | strides = (1, 1) 134 | ).init(inputShape.toList, testInit, adam) 135 | 136 | // when 137 | val a = convLayer(images) 138 | val poolingLayer = MaxPool[Double](padding = false).init(convLayer.shape) 139 | val pooled = poolingLayer(a.a) 140 | 141 | val maxPoolDelta = Array.fill(images.length)( 142 | Array( 143 | Array( 144 | Array(1d, 2) 145 | ), 146 | Array( 147 | Array(7d, 1) 148 | ), 149 | Array( 150 | Array(4d, 8) 151 | ) 152 | ) 153 | ) 154 | val Gradient(convDelta, _, _) = poolingLayer.backward(pooled, maxPoolDelta.as4D, None) 155 | val Gradient(delta, Some(wGrad), Some(bGrad)) = convLayer.backward(a, convDelta, None) 156 | val Some(weightsShape) = convLayer.w.map(_.shape) 157 | 158 | // then 159 | weightsShape should ===(wGrad.shape) 160 | val expectedConvGrad = Tensor4D(Array( 161 | Array.fill(3)(Array( 162 | Array(6d, 8), 163 | Array(12d,14) 164 | )), 165 | Array.fill(3)(Array( 166 | Array(3.0,4.0), 167 | Array(6.0,7.0) 168 | )), 169 | Array.fill(3)(Array( 170 | Array(24.0,32.0), 171 | Array(48.0,56.0) 172 | )) 173 | )).map(_ * images.length).as4D.data 174 | 175 | wGrad.as4D.data should ===(expectedConvGrad) 176 | val expectedDelta = Array.fill(2)( 177 | Array.fill(3)( 178 | Array( 179 | Array(0.0, 0.0, 0.0, 0.0), 180 | Array(0.0, 11.0, 11.0, 0.0), 181 | Array(0.0, 11.0, 11.0, 0.0) 182 | ) 183 | )) 184 | delta.as4D.data should===(expectedDelta) 185 | 186 | bGrad.shape should ===(List(3)) 187 | val expectedBias = Array(2d,1,8).map(_ * images.length) 188 | bGrad.as1D.data should ===(expectedBias) 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/main/scala/ml/network/ann.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.transformation.castFromTo 4 | import ml.tensors.api._ 5 | import ml.tensors.ops._ 6 | 7 | import Model._ 8 | import Sequential._ 9 | 10 | import scala.collection.mutable.ListBuffer 11 | import scala.reflect.ClassTag 12 | import scala.util.Random 13 | 14 | /* 15 | * z - before activation = w * x 16 | * a - activation value 17 | */ 18 | case class Activation[T](x: Tensor[T], z: Tensor[T], a: Tensor[T]) 19 | 20 | sealed trait Model[T]: 21 | def reset(): Model[T] 22 | def train(x: Tensor[T], y: Tensor[T], epochs: Int, shuffle: Boolean = true): Model[T] 23 | def layers: List[Layer[T]] 24 | def predict(x: Tensor[T], customLayers: List[Layer[T]] = layers): Tensor[T] 25 | def apply(x: Tensor[T], customLayers: List[Layer[T]] = layers): Tensor[T] = 26 | predict(x, customLayers) 27 | def history: TrainHistory[T] 28 | def metricValues: MetricValues[T] 29 | 30 | object Model: 31 | def getAvgLoss[T: ClassTag](losses: List[T])(using n: Fractional[T]): T = 32 | castFromTo[Double, T](n.toDouble(losses.sum) / losses.length) 33 | 34 | object Sequential: 35 | def activate[T: Numeric: ClassTag]( 36 | input: Tensor[T], 37 | layers: List[Layer[T]] 38 | ): List[Activation[T]] = 39 | layers 40 | .foldLeft(input, ListBuffer.empty[Activation[T]]) { 41 | case ((x, acc), layer) => 42 | val act = layer(x) 43 | (act.a, acc :+ act) 44 | } 45 | ._2 46 | .toList 47 | 48 | case class TrainHistory[T](layers: List[List[Layer[T]]] = Nil, losses: List[T] = Nil) 49 | 50 | type MetricValues[T] = List[(Metric[T], List[Double])] 51 | 52 | case class Sequential[T: ClassTag: Fractional, U, V]( 53 | lossFunc: Loss[T], 54 | learningRate: T, 55 | metrics: List[Metric[T]] = Nil, 56 | batchSize: Int = 16, 57 | layerStack: List[Int] => List[Layer[T]] = _ => List.empty[Layer[T]], 58 | layers: List[Layer[T]] = Nil, 59 | history: TrainHistory[T] = TrainHistory[T](), 60 | metricValues: MetricValues[T] = Nil, 61 | gradientClipping: GradientClipping[T] = GradientClippingApi.noClipping[T], 62 | cfg: Option[OptimizerCfg[T]] = None, 63 | printStepTps: Boolean = false 64 | )(using optimizer: Optimizer[U], initializer: ParamsInitializer[T, V]) extends Model[T]: 65 | 66 | private val optimizerCfg = 67 | cfg.getOrElse(OptimizerCfg(learningRate = learningRate, gradientClipping, AdamCfg.default)) 68 | 69 | def withCfg(cfg: OptimizerCfg[T]) = 70 | copy(cfg = Some(cfg)) 71 | 72 | def predict(x: Tensor[T], inputLayers: List[Layer[T]] = layers): Tensor[T] = 73 | activate(x, inputLayers).last.a 74 | 75 | def loss(x: Tensor[T], y: Tensor[T], w: List[Layer[T]]): T = 76 | val predicted = predict(x, w) 77 | lossFunc(y, predicted) 78 | 79 | def add(layer: Layer[T]): Sequential[T, U, V] = 80 | copy(layerStack = inputShape => 81 | val currentLayers = layerStack(inputShape) 82 | val prevShape = currentLayers.lastOption.map(_.shape).getOrElse(inputShape) 83 | val initialized = layer match 84 | case o: Optimizable[_] => o.init(prevShape, initializer, optimizer) 85 | case _ => layer.init(prevShape) 86 | (currentLayers :+ initialized) 87 | ) 88 | 89 | private def trainEpoch( 90 | batches: Array[(Tensor[T], Tensor[T])], 91 | layers: List[Layer[T]], 92 | epoch: Int 93 | ) = 94 | val (trained, losses, metricValue, _) = 95 | batches.zipWithIndex.foldLeft( 96 | layers, 97 | ListBuffer.empty[T], 98 | ListBuffer.fill(metrics.length)(0), 99 | 0L) { 100 | case ((layers, batchLoss, epochMetrics, stepDuration), ((x, y), i)) => 101 | // forward 102 | val start = System.currentTimeMillis() 103 | val activations = activate(x, layers) 104 | val predicted = activations.last.a 105 | val error = predicted - y 106 | val loss = lossFunc(y, predicted) 107 | 108 | // backward 109 | val updated = optimizer.updateWeights( 110 | layers, 111 | activations, 112 | error, 113 | optimizerCfg, 114 | (i + 1) * epoch 115 | ) 116 | 117 | // update metrics 118 | val matches = metrics 119 | .map(_.matches(y, predicted)) 120 | .zip(epochMetrics).map(_ + _) 121 | val duration = stepDuration + (System.currentTimeMillis() - start) 122 | printEpochPerformance(i + 1, duration) 123 | 124 | (updated, batchLoss :+ loss, matches.to(ListBuffer), duration) 125 | } 126 | (trained, getAvgLoss(losses.toList), metricValue) 127 | 128 | inline private def printEpochPerformance(step: Int, duration: Long) = 129 | if printStepTps && step % 50 == 0 then 130 | println(s"${step.toDouble / (duration / 1000d)} steps/sec") 131 | 132 | def train(x: Tensor[T], y: Tensor[T], epochs: Int, shuffle: Boolean = true): Model[T] = 133 | lazy val actualBatches = y.batches(batchSize).toArray 134 | lazy val batches = x.batches(batchSize).zip(actualBatches).toArray 135 | def getBatches = if shuffle then Random.shuffle(batches).toArray else batches 136 | val currentLayers = getOrInitLayers(x.shape) 137 | val initialMetrics = metrics.map(_ -> List.empty[Double]) 138 | println(s"Running $epochs epochs") 139 | 140 | val (updatedLayers, lHistory, epochLosses, metricValues) = 141 | (1 to epochs).foldLeft(currentLayers, ListBuffer.empty[List[Layer[T]]], ListBuffer.empty[T], initialMetrics) { 142 | case ((layers, lHistory, losses, trainingMetrics), epoch) => 143 | val start = System.currentTimeMillis() 144 | val (trainedLayers, avgLoss, epochMatches) = trainEpoch(getBatches, layers, epoch) 145 | val duration = System.currentTimeMillis() - start 146 | 147 | val (epochMetrics, epochMetricAvg) = updateMetrics(epochMatches.toList, trainingMetrics, x.length) 148 | printMetrics(epoch, epochs, avgLoss, epochMetricAvg, duration) 149 | 150 | (trainedLayers, lHistory :+ trainedLayers, losses :+ avgLoss, epochMetrics) 151 | } 152 | 153 | copy( 154 | layers = updatedLayers, 155 | history = history.copy(losses = epochLosses.toList, layers = lHistory.toList), 156 | metricValues = metricValues 157 | ) 158 | 159 | private def updateMetrics( 160 | observedMatches: List[Int], 161 | currentMetrics: MetricValues[T], 162 | samples: Int 163 | ) = 164 | val observedAvg = metrics.zip(observedMatches).map((m, matches) => m -> m.average(samples, matches)) 165 | val updatedMetrics = observedAvg.zip(currentMetrics).map { 166 | case ((_, v), (currentMetric, values)) => currentMetric -> (values :+ v) 167 | } 168 | (updatedMetrics, observedAvg) 169 | 170 | private def printMetrics(epoch: Int, epochs: Int, avgLoss: T, values: List[(Metric[T], Double)], duration: Long) = 171 | val metricsStat = values 172 | .map((m, avg) => s"${m.name}: $avg") 173 | .mkString(", metrics: [", ";", "]") 174 | println( 175 | s"epoch: $epoch/$epochs, duration: ${duration/1000} sec, avg. loss: $avgLoss${if metrics.nonEmpty then metricsStat else ""}" 176 | ) 177 | 178 | def reset(): Model[T] = 179 | copy(layers = Nil) 180 | 181 | private def getOrInitLayers(inputShape: List[Int]) = 182 | if layers.isEmpty then layerStack(inputShape) 183 | else layers -------------------------------------------------------------------------------- /src/main/scala/ml/network/layers.scala: -------------------------------------------------------------------------------- 1 | package ml.network 2 | 3 | import ml.tensors.api._ 4 | import ml.tensors.ops._ 5 | 6 | import scala.collection.mutable.ListBuffer 7 | import scala.reflect.ClassTag 8 | import scala.math.Numeric.Implicits._ 9 | import scala.collection.parallel.CollectionConverters._ 10 | 11 | final case class Gradient[T]( 12 | delta: Tensor[T], 13 | w: Option[Tensor[T]] = None, 14 | b: Option[Tensor[T]] = None 15 | ) 16 | 17 | trait Layer[T]: 18 | val f: ActivationFunc[T] = ActivationFuncApi.linear 19 | val shape: List[Int] 20 | 21 | def init[U, V](prevShape: List[Int]): Layer[T] = this 22 | def apply(x: Tensor[T]): Activation[T] 23 | def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T] 24 | 25 | override def toString() = 26 | s"\nf = ${f.name},\nshape = $shape" 27 | 28 | trait Optimizable[T] extends Layer[T]: 29 | val w: Option[Tensor[T]] 30 | val b: Option[Tensor[T]] 31 | val optimizerParams: Option[OptimizerParams[T]] 32 | 33 | def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Layer[T] 34 | 35 | def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T] 36 | 37 | override def toString() = 38 | s"(${super.toString},\nweight = $w,\nbias = $b)" 39 | 40 | case class Dense[T: ClassTag]( 41 | override val f: ActivationFunc[T] = ActivationFuncApi.linear[T], 42 | units: Int = 1, 43 | shape: List[Int] = Nil, 44 | w: Option[Tensor[T]] = None, 45 | b: Option[Tensor[T]] = None, 46 | optimizerParams: Option[OptimizerParams[T]] = None 47 | )(using n: Fractional[T]) extends Optimizable[T]: 48 | 49 | override def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Layer[T] = 50 | val inputs = prevShape.drop(1).reduce(_ * _) 51 | val w = initializer.weights(inputs, units) 52 | val b = initializer.biases(units) 53 | val optimizerParams = optimizer.init(w, b) 54 | copy(w = Some(w), b = Some(b), shape = List(inputs, units), optimizerParams = optimizerParams) 55 | 56 | override def apply(x: Tensor[T]): Activation[T] = 57 | val z = x * w + b 58 | val a = f(z) 59 | Activation(x, z, a) 60 | 61 | override def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T] = 62 | val updatedW = w.map(_ - wGradient) 63 | val updatedB = b.map(_ - bGradient) 64 | copy(w = updatedW, b = updatedB, optimizerParams = optimizerParams) 65 | 66 | override def backward(a: Activation[T], prevDelta: Tensor[T], prevWeight: Option[Tensor[T]]): Gradient[T] = 67 | val delta = (prevWeight match 68 | case Some(pw) => prevDelta * pw.T 69 | case None => prevDelta 70 | ) |*| f.derivative(a.z) 71 | 72 | val wGradient = Some(a.x.T * delta) 73 | val bGradient = Some(delta) 74 | Gradient(delta, wGradient, bGradient) 75 | 76 | case class Conv2D[T: ClassTag]( 77 | override val f: ActivationFunc[T], 78 | filterCount: Int = 1, 79 | kernel: (Int, Int) = (2, 2), 80 | strides: (Int, Int) = (1, 1), 81 | shape: List[Int] = Nil, 82 | w: Option[Tensor[T]] = None, 83 | b: Option[Tensor[T]] = None, 84 | optimizerParams: Option[OptimizerParams[T]] = None 85 | )(using n: Fractional[T]) extends Optimizable[T]: 86 | 87 | override def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Conv2D[T] = 88 | val images :: channels :: height :: width :: _ = prevShape 89 | val w = initializer.weights4D(List(filterCount, channels, kernel._1, kernel._2)) 90 | val b = initializer.biases(filterCount) 91 | val optimizerParams = optimizer.init(w, b) 92 | val rows = (height - kernel._1) / strides._1 + 1 93 | val cols = (width - kernel._2) / strides._2 + 1 94 | val shape = List(images, filterCount, rows, cols) 95 | copy(w = Some(w), b = Some(b), shape = shape, optimizerParams = optimizerParams) 96 | 97 | override def apply(x: Tensor[T]): Activation[T] = 98 | val z = (w, b) match 99 | case (Some(w), Some(b)) => forward(kernel, strides, x, w, b) 100 | case _ => x // does nothing when one of the params is empty 101 | val a = f(z) 102 | Activation(x, z, a) 103 | 104 | private def forward(kernel: (Int, Int), stride: (Int, Int), x: Tensor[T], w: Tensor[T], b: Tensor[T]): Tensor[T] = 105 | val (images, filters) = (x.as4D, w.as4D) 106 | 107 | def filterImage(image: Array[Array[Array[T]]]) = 108 | filters.data.zip(b.as1D.data).map { (f, b) => 109 | val filtered = f.zip(image).map { (fc, ic) => 110 | conv(fc.as2D, ic.as2D, kernel, stride) 111 | }.reduce(_ + _) 112 | filtered + b.asT 113 | } 114 | 115 | images.data.par.map(filterImage).toArray.as4D 116 | 117 | private def conv(filterChannel: Tensor2D[T], imageChannel: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) = 118 | val filtered = 119 | for row <- imageRegions(imageChannel, kernel, stride) yield 120 | for (region, _, _) <- row yield 121 | (region |*| filterChannel).sum 122 | 123 | filtered.as2D 124 | 125 | private def fullConv(filter: Tensor2D[T], loss: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int), rows: Int, cols: Int) = 126 | val out = Array.ofDim(rows, cols) 127 | 128 | for i <- 0 until kernel._1 do 129 | for j <- 0 until kernel._2 do 130 | val delta = filter * loss.data(i)(j) 131 | val (x, y) = (i * stride._1, j * stride._2) 132 | 133 | val iter = delta.as2D.data.flatten.iterator 134 | for k <- x until x + kernel._1 do 135 | for l <- y until y + kernel._2 do 136 | out(k)(l) += iter.next 137 | 138 | out.as2D 139 | 140 | private def calcGradient(loss: Tensor2D[T], image: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) = 141 | val grad = 142 | for (region, i, j) <- imageRegions(image, kernel, stride).flatten 143 | yield region * loss.data(i)(j) 144 | 145 | grad.reduce(_ + _).as2D 146 | 147 | private def imageRegions(image: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) = 148 | val (rows, cols) = image.shape2D 149 | for i <- 0 to rows - kernel._1 by stride._1 yield 150 | for j <- 0 to cols - kernel._2 by stride._2 yield 151 | (image.slice((i, i + kernel._1), (j, j + kernel._2)).as2D, i, j) 152 | 153 | override def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T] = 154 | (w, b) match 155 | case (Some(w), Some(b)) => 156 | val prevLoss = prevDelta.as4D // nImages, filters, rows, cols 157 | val x = a.x.as4D 158 | 159 | def imageGrad(imageChannels: Array[Array[Array[T]]], lossChannels: Array[Array[Array[T]]]) = 160 | lossChannels.map { lc => 161 | imageChannels.map { ic => 162 | calcGradient(lc.as2D, ic.as2D, kernel, strides) 163 | } 164 | } 165 | 166 | val wGradient = x.data.zip(prevLoss.data) 167 | .par.map(imageGrad) 168 | .reduce { 169 | (image1, image2) => 170 | image1.zip(image2).map { (channels1, channels2) => 171 | channels1.zip(channels2).map(_ + _) 172 | } 173 | }.as4D 174 | 175 | val bGradient = prevLoss.data 176 | .par.map(_.map(_.sum)) 177 | .reduce(_ + _) 178 | .as1D 179 | 180 | val (_, _, rows, cols) = x.shape4D 181 | val delta = prevLoss.data.par.map { lossChannels => 182 | w.as4D.data.map { channels => 183 | lossChannels.zip(channels).map { (lc, fc) => 184 | fullConv(fc.as2D, lc.as2D, kernel, strides, rows, cols) 185 | }.reduce(_ + _) 186 | } 187 | }.toArray.as4D 188 | 189 | Gradient(delta, Some(wGradient), Some(bGradient)) 190 | case _ => 191 | Gradient(prevDelta) 192 | 193 | override def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T] = 194 | val updatedW = w.map(_ - wGradient) 195 | val updatedB = b.map(_ - bGradient) 196 | copy(w = updatedW, b = updatedB, optimizerParams = optimizerParams) 197 | 198 | case class MaxPool[T: ClassTag: Numeric]( 199 | window: (Int, Int) = (2, 2), 200 | strides: (Int, Int) = (1, 1), 201 | shape: List[Int] = Nil, 202 | shape2D: (Int, Int) = (0, 0), 203 | padding: Boolean = true 204 | ) extends Layer[T]: 205 | 206 | override def init[U, V](prevShape: List[Int]): Layer[T] = 207 | val (a :: b :: rows :: cols :: _) = prevShape 208 | val pad = if padding then 1 else 0 209 | val height = (rows - window._1 + pad) / strides._1 + 1 210 | val width = (cols - window._2 + pad) / strides._2 + 1 211 | val shape = List(a, b, height, width) 212 | copy(shape = shape, shape2D = (height, width)) 213 | 214 | def apply(x: Tensor[T]): Activation[T] = 215 | val pooled = x.as4D.data.map(_.map(c => poolMax(c.as2D))).as4D 216 | Activation(x, pooled, pooled) 217 | 218 | private def imageRegions(image: Tensor2D[T], window: (Int, Int), strides: (Int, Int)) = 219 | val (rows, cols) = shape2D 220 | for i <- 0 until rows by strides._1 yield 221 | for j <- 0 until cols by strides._2 yield 222 | (image.slice((i, i + window._1), (j, j + window._2)).as2D, i, j) 223 | 224 | private def poolMax(image: Tensor2D[T]): Tensor2D[T] = 225 | val (rows, cols) = shape2D 226 | val out = Array.ofDim(rows, cols) 227 | val pooled = 228 | for (region, i, j) <- imageRegions(image, window, strides).flatten yield 229 | out(i)(j) = region.max 230 | out.as2D 231 | 232 | private def maxIndex(matrix: Tensor2D[T]): (Int, Int) = 233 | val maxPerRow = matrix.data.zipWithIndex.map((row, i) => (row.max, i, row.indices.maxBy(row))) 234 | maxPerRow.maxBy(_._1).tail 235 | 236 | def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T] = 237 | val images = a.x.as4D.data 238 | val delta = images.zip(prevDelta.as4D.data).par.map { (imageChannels, deltaChannels) => 239 | imageChannels.zip(deltaChannels).map { (ic, dc) => 240 | val image = ic.as2D 241 | val out = image.zero.as2D.data 242 | for (region, i, j) <- imageRegions(image, window, strides).flatten yield 243 | val (a, b) = maxIndex(region) 244 | out(i + a)(j + b) = dc(i)(j) 245 | out 246 | } 247 | } 248 | Gradient(delta.toArray.as4D) 249 | 250 | case class Flatten2D[T: ClassTag: Numeric]( 251 | shape: List[Int] = Nil, 252 | prevShape: List[Int] = Nil 253 | ) extends Layer[T]: 254 | 255 | override def init[U, V](prevShape: List[Int]): Layer[T] = 256 | val (head :: tail ) = prevShape 257 | val shape = List(head, tail.reduce(_ * _)) 258 | copy(shape = shape, prevShape = prevShape) 259 | 260 | def apply(x: Tensor[T]): Activation[T] = 261 | val flat = x.as2D 262 | Activation(x, flat, flat) 263 | 264 | def backward(a: Activation[T], prevDelta: Tensor[T], prevWeight: Option[Tensor[T]]): Gradient[T] = 265 | val delta = (prevWeight match 266 | case Some(pw) => prevDelta * pw.T 267 | case None => prevDelta 268 | ) //|*| f.derivative(a.z) //TODO: is any z multiply required here? 269 | 270 | val (filters :: rows :: cols :: _) = prevShape.drop(1) 271 | val unflatten = delta.reshape(List(filters, rows, cols)) 272 | Gradient(unflatten) -------------------------------------------------------------------------------- /src/main/scala/ml/tensors/ops.scala: -------------------------------------------------------------------------------- 1 | package ml.tensors 2 | 3 | import ml.tensors.api._ 4 | import ml.transformation.castFromTo 5 | 6 | import scala.reflect.ClassTag 7 | import scala.collection.mutable.ArrayBuffer 8 | import math.Numeric.Implicits.infixNumericOps 9 | import math.Ordering.Implicits.infixOrderingOps 10 | import math.Fractional.Implicits.infixFractionalOps 11 | import math.Integral.Implicits.infixIntegralOps 12 | 13 | private trait genOps: 14 | extension [T: ClassTag: Numeric](t: Tensor[T]) 15 | // dot product 16 | def *(that: Tensor[T]): Tensor[T] = TensorOps.mul(t, that) 17 | def *(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMul(t, that) 18 | def *(that: T): Tensor[T] = TensorOps.mul(t, Tensor0D(that)) 19 | def -(that: T): Tensor[T] = TensorOps.subtract(t, Tensor0D(that)) 20 | def -(that: Tensor[T]): Tensor[T] = TensorOps.subtract(t, that) 21 | def +(that: Tensor[T]): Tensor[T] = TensorOps.plus(t, that) 22 | def +(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optPlus(t, that) 23 | def +(that: T): Tensor[T] = TensorOps.plus(t, Tensor0D(that)) 24 | def sum: T = TensorOps.sum(t) 25 | def split(fraction: Float): (Tensor[T], Tensor[T]) = TensorOps.split(fraction, t) 26 | 27 | // Hadamard product 28 | def multiply(that: Tensor[T]): Tensor[T] = TensorOps.multiply(t, that) 29 | def multiply(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMultiply(t, that) 30 | def |*|(that: Tensor[T]): Tensor[T] = TensorOps.multiply(t, that) 31 | def |*|(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMultiply(t, that) 32 | 33 | def batches(batchSize: Int): Iterator[Tensor[T]] = TensorOps.batches(t, batchSize) 34 | def equalRows(that: Tensor[T]): Int = TensorOps.equalRows(t, that) 35 | def clipInRange(min: T, max: T): Tensor[T] = TensorOps.clipInRange(t, min, max) 36 | def :**(to: Int): Tensor[T] = TensorOps.pow(t, to) 37 | def sqr: Tensor[T] = TensorOps.pow(t, 2) 38 | def sqrt: Tensor[T] = TensorOps.sqrt(t) 39 | def zero: Tensor[T] = TensorOps.zero(t) 40 | def argMax: Tensor[T] = TensorOps.argMax(t) 41 | def outer(that: Tensor[T]) = TensorOps.outer(t, that) 42 | def flatten: Tensor[T] = TensorOps.flatten(t) 43 | def diag: Tensor[T] = TensorOps.diag(t) 44 | def sumRows: Tensor[T] = TensorOps.sumRows(t) 45 | def sumCols: Tensor[T] = TensorOps.sumCols(t) 46 | def max: T = TensorOps.max(t) 47 | def reshape(shape: List[Int]): Tensor[T] = TensorOps.reshape(t, shape) 48 | 49 | extension [T: ClassTag: Fractional](t: Tensor[T]) 50 | def clipByNorm(norm: T): Tensor[T] = TensorOps.clipByNorm(t, norm) 51 | def /(that: Tensor[T]): Tensor[T] = TensorOps.div(t, that) 52 | def :/(that: T): Tensor[T] = TensorOps.div(t, Tensor0D(that)) 53 | 54 | extension [T: ClassTag](t: Tensor[T]) 55 | def T: Tensor[T] = TensorOps.transpose(t) 56 | def map[U: ClassTag](f: T => U): Tensor[U] = TensorOps.map[T, U](t, f) 57 | def mapRow[U: ClassTag](f: Array[T] => Array[U]): Tensor[U] = TensorOps.mapRow[T, U](t, f) 58 | 59 | object ops extends genOps: 60 | extension [T: ClassTag](t: Tensor2D[T]) 61 | def col(i: Int): Tensor1D[T] = Tensor1D(TensorOps.col(t.data, i)) 62 | def T: Tensor2D[T] = TensorOps.transpose(t).asInstanceOf[Tensor2D[T]] 63 | def slice( 64 | rows: Option[(Int, Int)], 65 | cols: Option[(Int, Int)] 66 | ): Tensor2D[T] = 67 | Tensor2D(t.data.slice(rows, cols)) 68 | def slice( 69 | rows: (Int, Int), 70 | cols: (Int, Int) 71 | ): Tensor2D[T] = 72 | Tensor2D(TensorOps.sliceArr(t.data, rows, cols)) 73 | 74 | extension [T: ClassTag: Numeric](t: Tensor2D[T]) 75 | def |*|(that: Tensor2D[T]): Tensor2D[T] = TensorOps.multiply(t, that).asInstanceOf[Tensor2D[T]] 76 | def +(that: Tensor[T]): Tensor2D[T] = TensorOps.plus(t, that).asInstanceOf[Tensor2D[T]] 77 | 78 | extension [T: ClassTag](t: Tensor[T]) 79 | def as0D: Tensor0D[T] = TensorOps.as0D(t) 80 | def as1D: Tensor1D[T] = TensorOps.as1D(t) 81 | def as2D: Tensor2D[T] = TensorOps.as2D(t) 82 | def as3D: Tensor3D[T] = TensorOps.as3D(t) 83 | def as4D: Tensor4D[T] = TensorOps.as4D(t) 84 | 85 | extension [T: ClassTag](t: T) 86 | def asT: Tensor[T] = Tensor0D(t) 87 | def as0D: Tensor0D[T] = Tensor0D(t) 88 | def as1D: Tensor1D[T] = Tensor1D(Array(t)) 89 | def as2D: Tensor2D[T] = Tensor2D(Array(Array(t))) 90 | 91 | extension [T: ClassTag](t: T)(using n: Numeric[T]) 92 | def **(to: Int): T = castFromTo[Double, T](math.pow(n.toDouble(t), to)) 93 | 94 | implicit class Tensor0DOps[T: ClassTag: Numeric](val t: T): 95 | // dot product 96 | def *(that: Tensor[T]): Tensor[T] = TensorOps.mul(Tensor0D(t), that) 97 | def -(that: Tensor[T]): Tensor[T] = TensorOps.subtract(Tensor0D(t), that) 98 | def +(that: Tensor[T]): Tensor[T] = TensorOps.plus(Tensor0D(t), that) 99 | 100 | extension [T: ClassTag: Numeric](a: Array[T]) 101 | def as1D: Tensor1D[T] = Tensor1D(a) 102 | def as2D: Tensor2D[T] = Tensor2D(a) 103 | 104 | extension [T: ClassTag](a: Array[T])(using n: Numeric[T]) 105 | def +(b: Array[T]): Array[T] = a.zip(b).map(n.plus) 106 | 107 | extension [T: ClassTag: Numeric](a: Array[Array[T]]) 108 | def as2D: Tensor2D[T] = Tensor2D(a) 109 | def sum: T = a.map(_.sum).sum 110 | 111 | extension [T: ClassTag: Numeric](a: IndexedSeq[IndexedSeq[T]]) 112 | def as2D: Tensor2D[T] = Tensor2D(a.map(_.toArray).toArray) 113 | 114 | extension [T: ClassTag: Numeric](a: Array[Tensor2D[T]]) 115 | def as3D: Tensor3D[T] = Tensor3D(a:_*) 116 | 117 | extension [T: ClassTag: Numeric](a: Array[Array[Array[T]]]) 118 | def as3D: Tensor3D[T] = Tensor3D(a) 119 | 120 | extension [T: ClassTag: Numeric](a: Array[Array[Array[Array[T]]]]) 121 | def as4D: Tensor4D[T] = Tensor4D(a) 122 | 123 | extension [T: ClassTag: Numeric](a: Array[Array[Tensor2D[T]]]) 124 | def as4D: Tensor4D[T] = Tensor4D(a.map(_.map(_.data))) 125 | 126 | extension [T: ClassTag](a: Array[Array[T]]) 127 | def col(i: Int): Array[T] = TensorOps.col(a, i) 128 | def slice( 129 | rows: Option[(Int, Int)] = None, 130 | cols: Option[(Int, Int)] = None 131 | ): Array[Array[T]] = TensorOps.slice(a, rows, cols) 132 | 133 | extension [T: ClassTag: Numeric](pair: (Tensor[T], Tensor[T])) 134 | def map2[U: ClassTag: Numeric](f: (T, T) => U): Tensor[U] = 135 | TensorOps.map2(pair._1, pair._2, f) 136 | 137 | def split( 138 | fraction: Float 139 | ): ((Tensor[T], Tensor[T]), (Tensor[T], Tensor[T])) = 140 | TensorOps.split(fraction, pair) 141 | 142 | extension [T: ClassTag](t: Tensor1D[T]) 143 | def batchColumn(batchSize: Int): Iterator[Array[T]] = 144 | t.data.grouped(batchSize) 145 | 146 | object TensorOps: 147 | 148 | def subtract[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] = 149 | (a, b) match 150 | case (Tensor1D(data), Tensor1D(data2)) => 151 | checkShapeEquality(a, b) 152 | Tensor1D(data.zip(data2).map(_ - _)) 153 | case (Tensor2D(data), Tensor2D(data2)) => 154 | checkShapeEquality(a, b) 155 | Tensor2D(matrixMinusMatrix(data, data2)) 156 | case (Tensor2D(data), Tensor0D(data2)) => // broadcasting 157 | Tensor2D(data.map(_.map(_ - data2))) 158 | case (Tensor0D(data), Tensor2D(data2)) => // broadcasting 159 | Tensor2D(data2.map(_.map(v => data - v))) 160 | case (Tensor1D(data), Tensor0D(data2)) => // broadcasting 161 | Tensor1D(data.map(_ - data2)) 162 | case (t1 @ Tensor2D(_), t2 @ Tensor1D(_)) => // broadcasting 163 | matrixMinusVector(t1, t2) 164 | case (Tensor4D(data), Tensor4D(data2)) => 165 | checkShapeEquality(a, b) 166 | val res = data.zip(data2).map { (cubes, cubes2) => 167 | cubes.zip(cubes2).map { (mat1, mat2) => 168 | matrixMinusMatrix(mat1, mat2) 169 | } 170 | } 171 | Tensor4D(res) 172 | case (t1, t2) => 173 | sys.error(s"Not implemented for\n$t1 and\n$t2") 174 | 175 | private def matrixMinusMatrix[T: ClassTag: Numeric](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] = 176 | val rows = a.length 177 | val cols = a.headOption.map(_.length).getOrElse(0) 178 | val res = Array.ofDim[T](rows, cols) 179 | 180 | for i <- a.indices do 181 | for j <- 0 until cols do 182 | res(i)(j) = a(i)(j) - b(i)(j) 183 | res 184 | 185 | private def matrixMinusVector[T: Numeric: ClassTag]( 186 | matrix: Tensor2D[T], 187 | vector: Tensor1D[T] 188 | ) = 189 | val cols = matrix.shape2D._2 190 | assert( 191 | cols == vector.length, 192 | s"trailing axis must have the same size, $cols != ${vector.length}" 193 | ) 194 | val res = matrix.data.map(_.zip(vector.data).map{(a, b) => a - b }) 195 | Tensor2D(res) 196 | 197 | private def checkShapeEquality[T](a: Tensor[T], b: Tensor[T]) = 198 | assert(a.shape == b.shape, s"Tensors must have the same shape: ${a.shape} != ${b.shape}") 199 | 200 | def optPlus[T: ClassTag: Numeric](a: Tensor[T], b: Option[Tensor[T]]): Tensor[T] = 201 | b.fold(a)(t => plus(a, t)) 202 | 203 | def plus[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] = 204 | (a, b) match 205 | // broadcasting 206 | case (Tensor2D(data), Tensor0D(data2)) => 207 | Tensor2D(data.map(_.map(_ + data2))) 208 | case (Tensor0D(data), Tensor2D(data2)) => 209 | Tensor2D(data2.map(_.map(_ + data))) 210 | case (t1 @ Tensor2D(_), t2 @ Tensor1D(_)) => 211 | matrixPlusVector(t1, t2) 212 | case (t1 @ Tensor1D(_), t2 @ Tensor2D(_)) => 213 | matrixPlusVector(t2, t1) 214 | case (Tensor1D(data), Tensor0D(data2)) => 215 | Tensor1D(data.map(_ + data2)) 216 | case (Tensor0D(data), Tensor1D(data2)) => 217 | Tensor1D(data2.map(_ + data)) 218 | case (Tensor4D(data), Tensor0D(data2)) => 219 | Tensor4D(data.map(_.map(_.map(_.map(_ + data2))))) 220 | 221 | case (Tensor1D(data), Tensor1D(data2)) => 222 | checkShapeEquality(a, b) 223 | val res = Array.ofDim(data.length) 224 | for i <- 0 until data.length do 225 | res(i) = data(i) + data2(i) 226 | Tensor1D(res) 227 | case (t1 @ Tensor2D(data), Tensor2D(data2)) => 228 | checkShapeEquality(a, b) 229 | val res = matrixPlusMatrix(data, data2) 230 | Tensor2D(res) 231 | case (Tensor4D(data), Tensor4D(data2)) => 232 | checkShapeEquality(a, b) 233 | val res = data.zip(data2).map { (cubes1, cubes2) => 234 | cubes1.zip(cubes2).map { (mat1, mat2) => 235 | matrixPlusMatrix(mat1, mat2) 236 | } 237 | } 238 | Tensor4D(res) 239 | case (Tensor0D(data), Tensor0D(data2)) => 240 | Tensor0D(data + data2) 241 | case _ => notImplementedError(a :: b:: Nil) 242 | 243 | private def matrixPlusMatrix[T: ClassTag: Numeric](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] = 244 | val (rows, cols) = (a.length, a.head.length) 245 | val res = Array.ofDim(rows, cols) 246 | for i <- 0 until rows do 247 | for j <- 0 until cols do 248 | res(i)(j) = a(i)(j) + b(i)(j) 249 | res 250 | 251 | private def notImplementedError[T](ts: List[Tensor[T]]) = 252 | sys.error(s"Not implemented for: ${ts.mkString("\n")}") 253 | 254 | private def matrixPlusVector[T: ClassTag: Numeric]( 255 | t1: Tensor2D[T], 256 | t2: Tensor1D[T] 257 | ) = 258 | val (rows, cols) = t1.shape2D 259 | assert( 260 | cols == t2.length, 261 | s"tensors must have the same amount of cols to sum them up element-wise, but were: $cols != ${t2.length}" 262 | ) 263 | val sum = Array.ofDim[T](rows, cols) 264 | for i <- 0 until rows do 265 | for j <- 0 until cols do 266 | sum(i)(j) = t1.data(i)(j) + t2.data(j) 267 | Tensor2D(sum) 268 | 269 | def optMul[T: ClassTag: Numeric](a: Tensor[T], b: Option[Tensor[T]]): Tensor[T] = 270 | b.fold(a)(t => mul(a, t)) 271 | 272 | def mul[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] = 273 | (a, b) match 274 | case (Tensor0D(data), t) => 275 | scalarMul(t, data) 276 | case (t, Tensor0D(data)) => 277 | scalarMul(t, data) 278 | case (Tensor1D(data), Tensor2D(data2)) => 279 | Tensor2D(matMul(Array(data), data2)) 280 | case (Tensor2D(data), Tensor1D(data2)) => 281 | Tensor2D(matMul(data, asColumn(data2))) 282 | case (Tensor1D(data), Tensor1D(data2)) => 283 | Tensor0D(matMul(Array(data), asColumn(data2)).head.head) 284 | case (Tensor2D(data), Tensor2D(data2)) => 285 | Tensor2D(matMul(data, data2)) 286 | case _ => notImplementedError(a :: b :: Nil) 287 | 288 | private def asColumn[T: ClassTag](a: Array[T]) = a.map(Array(_)) 289 | 290 | def map[T: ClassTag, U: ClassTag](t: Tensor[T], f: T => U): Tensor[U] = 291 | t match 292 | case Tensor0D(data) => Tensor0D(f(data)) 293 | case Tensor1D(data) => Tensor1D(data.map(f)) 294 | case Tensor2D(data) => Tensor2D(data.map(_.map(f))) 295 | case Tensor3D(data) => Tensor3D(data.map(_.map(_.map(f)))) 296 | case Tensor4D(data) => Tensor4D(data.map(_.map(_.map(_.map(f))))) 297 | 298 | def mapRow[T: ClassTag, U: ClassTag](t: Tensor[T], f: Array[T] => Array[U]): Tensor[U] = 299 | t match 300 | case Tensor0D(data) => Tensor0D(f(Array(data)).head) 301 | case Tensor1D(data) => Tensor1D(f(data)) 302 | case Tensor2D(data) => Tensor2D(data.map(f)) 303 | case _ => notImplementedError(t :: Nil) 304 | 305 | private def map2[T: ClassTag, U: ClassTag](a: Array[T], b: Array[T], f: (T, T) => U): Array[U] = 306 | val res = Array.ofDim[U](a.length) 307 | for i <- (0 until a.length).indices do 308 | res(i) = f(a(i), b(i)) 309 | res 310 | 311 | def map2[T: ClassTag: Numeric, U: ClassTag: Numeric](a: Tensor[T], b: Tensor[T], f: (T, T) => U): Tensor[U] = 312 | (a, b) match 313 | case (Tensor0D(data), Tensor0D(data2)) => 314 | Tensor0D(f(data, data2)) 315 | case (Tensor1D(data), Tensor1D(data2)) => 316 | Tensor1D(map2(data, data2, f)) 317 | case (Tensor2D(data), Tensor2D(data2)) => 318 | val res = Array.ofDim[U](data.length, colsCount(data2)) 319 | for i <- (0 until data.length).indices do 320 | res(i) = map2(data(i), data2(i), f) 321 | Tensor2D(res) 322 | case _ => 323 | sys.error(s"Both tensors must have the same dimension: ${a.shape} != ${b.shape}") 324 | 325 | private def colsCount[T](a: Array[Array[T]]): Int = 326 | a.headOption.map(_.length).getOrElse(0) 327 | 328 | private def scalarMul[T: ClassTag: Numeric]( 329 | t: Tensor[T], 330 | scalar: T 331 | ): Tensor[T] = 332 | t match 333 | case Tensor0D(data) => Tensor0D(data * scalar) 334 | case Tensor1D(data) => Tensor1D(data.map(_ * scalar)) 335 | case Tensor2D(data) => Tensor2D(data.map(_.map(_ * scalar))) 336 | case Tensor4D(data) => Tensor4D(data.map(_.map(_.map(_.map(_ * scalar))))) 337 | case _ => notImplementedError(t :: Nil) 338 | 339 | private def matMul[T: ClassTag]( 340 | a: Array[Array[T]], 341 | b: Array[Array[T]] 342 | )(using n: Numeric[T]): Array[Array[T]] = 343 | assert( 344 | a.head.length == b.length, 345 | s"The number of columns in the first matrix should be equal to the number of rows in the second, ${a.head.length} != ${b.length}" 346 | ) 347 | val rows = a.length 348 | val cols = colsCount(b) 349 | val res = Array.ofDim[T](rows, cols) 350 | 351 | for i <- (0 until rows).indices do 352 | for j <- (0 until cols).indices do 353 | var sum = n.zero 354 | for k <- b.indices do 355 | sum = sum + (a(i)(k) * b(k)(j)) 356 | res(i)(j) = sum 357 | res 358 | 359 | def as0D[T: ClassTag](t: Tensor[T]): Tensor0D[T] = 360 | t match 361 | case Tensor0D(data) => Tensor0D(data) 362 | case t1 @ Tensor1D(data) => Tensor0D(data.head) 363 | case Tensor2D(data) => Tensor0D(data.head.head) 364 | case _ => notImplementedError(t :: Nil) 365 | 366 | def as1D[T: ClassTag](t: Tensor[T]): Tensor1D[T] = 367 | t match 368 | case Tensor0D(data) => Tensor1D(data) 369 | case t1 @ Tensor1D(_) => t1 370 | case Tensor2D(data) => Tensor1D(data.flatten) 371 | case _ => notImplementedError(t :: Nil) 372 | 373 | def as2D[T: ClassTag](t: Tensor[T]): Tensor2D[T] = 374 | t match 375 | case Tensor0D(data) => Tensor2D(Array(Array(data))) 376 | case Tensor1D(data) => Tensor2D(data.map(Array(_))) 377 | case t1 @ Tensor2D(_) => t1 378 | case t1 @ Tensor4D(data) => Tensor2D(data.map(_.map(_.flatten).flatten)) 379 | case _ => notImplementedError(t :: Nil) 380 | 381 | def as3D[T: ClassTag](t: Tensor[T]): Tensor3D[T] = 382 | t match 383 | case Tensor0D(data) => Tensor3D(Array(Array(Array(data)))) 384 | case Tensor2D(data) => Tensor3D(Array(data)) 385 | case t1 @ Tensor3D(_) => t1 386 | case _ => notImplementedError(t :: Nil) 387 | 388 | def as4D[T: ClassTag](t: Tensor[T]): Tensor4D[T] = 389 | t match 390 | case Tensor0D(data) => Tensor4D(Array(Array(Array(Array(data))))) 391 | case Tensor1D(data) => Tensor4D(Array(Array(data.map(Array(_))))) 392 | case t2 @ Tensor2D(_) => Tensor4D(Array(Array(t2.data))) 393 | case t1 @ Tensor4D(_) => t1 394 | case _ => notImplementedError(t :: Nil) 395 | 396 | def sum[T: Numeric: ClassTag](t: Tensor[T]): T = 397 | t match 398 | case Tensor0D(data) => data 399 | case Tensor1D(data) => data.sum 400 | case Tensor2D(data) => data.map(_.sum).sum 401 | case Tensor4D(data) => data.map(_.map(_.map(_.sum).sum).sum).sum 402 | case _ => notImplementedError(t :: Nil) 403 | 404 | def sumRows[T: Numeric: ClassTag](t: Tensor[T]): Tensor[T] = 405 | t match 406 | case Tensor0D(_) => t 407 | case Tensor1D(_) => t 408 | case Tensor2D(data) => 409 | Tensor1D(data.reduce((a, b) => a.lazyZip(b).map(_ + _).toArray)) 410 | case _ => notImplementedError(t :: Nil) 411 | 412 | def sumCols[T: Numeric: ClassTag](t: Tensor[T]): Tensor[T] = 413 | t match 414 | case Tensor0D(_) => t 415 | case Tensor1D(data) => Tensor0D(data.sum) 416 | case Tensor2D(data) => Tensor2D(data.map(a => Array(a.sum))) 417 | case _ => notImplementedError(t :: Nil) 418 | 419 | def transpose[T: ClassTag](t: Tensor[T]): Tensor[T] = 420 | t match 421 | case t2 @ Tensor2D(data) => 422 | val (rows, cols) = t2.shape2D 423 | val transposed = Array.ofDim[T](cols, rows) 424 | 425 | for i <- (0 until rows).indices do 426 | for j <- (0 until cols).indices do 427 | transposed(j)(i) = data(i)(j) 428 | Tensor2D(transposed) 429 | case Tensor1D(data) => Tensor2D(asColumn(data)) 430 | case _ => t 431 | 432 | def split[T: ClassTag]( 433 | fraction: Float, 434 | t: Tensor[T] 435 | ): (Tensor[T], Tensor[T]) = 436 | t match 437 | case Tensor0D(_) => (t, t) 438 | case Tensor1D(data) => 439 | val (l, r) = splitArray(fraction, data) 440 | (Tensor1D(l), Tensor1D(r)) 441 | case Tensor2D(data) => 442 | val (l, r) = splitArray(fraction, data) 443 | (Tensor2D(l), Tensor2D(r)) 444 | case _ => notImplementedError(t :: Nil) 445 | 446 | private def splitArray[T]( 447 | fraction: Float, 448 | data: Array[T] 449 | ): (Array[T], Array[T]) = 450 | val count = data.length * fraction 451 | val countOrZero = if count < 1 then 0 else count 452 | data.splitAt(data.length - countOrZero.toInt) 453 | 454 | def split[T: ClassTag]( 455 | fraction: Float, 456 | t: (Tensor[T], Tensor[T]) 457 | ): ((Tensor[T], Tensor[T]), (Tensor[T], Tensor[T])) = 458 | val (l, r) = t 459 | assert(l.length == r.length, s"Both tensors must have the same length, ${l.length} != ${r.length}") 460 | split(fraction, l) -> split(fraction, r) 461 | 462 | def multiply[T: Numeric: ClassTag]( 463 | t1: Tensor[T], 464 | t2: Tensor[T] 465 | ): Tensor[T] = 466 | assert( 467 | t1.length == t2.length, 468 | s"Both vectors must have the same length, ${t1.length} != ${t2.length}" 469 | ) 470 | (t1, t2) match 471 | case (Tensor1D(data), Tensor1D(data2)) => 472 | Tensor1D(data.zip(data2).map((a, b) => a * b)) 473 | case (a @ Tensor2D(data), Tensor2D(data2)) => 474 | val (rows, cols) = a.shape2D 475 | val sum = Array.ofDim[T](rows, cols) 476 | for i <- 0 until rows do 477 | for j <- 0 until cols do 478 | sum(i)(j) = data(i)(j) * data2(i)(j) 479 | Tensor2D(sum) 480 | case (a, b) => sys.error(s"Not implemented for:\n$a\nand\n$b") 481 | 482 | def optMultiply[T: Numeric: ClassTag]( 483 | t1: Tensor[T], t2: Option[Tensor[T]] 484 | ): Tensor[T] = 485 | t2.fold(t1)(a => multiply(t1, a)) 486 | 487 | def batches[T: ClassTag: Numeric]( 488 | t: Tensor[T], 489 | batchSize: Int 490 | ): Iterator[Tensor[T]] = 491 | t match 492 | case Tensor0D(data) => Iterator(t) 493 | case Tensor1D(data) => data.grouped(batchSize).map(Tensor1D(_)) 494 | case Tensor2D(data) => data.grouped(batchSize).map(Tensor2D(_)) 495 | case Tensor3D(data) => data.grouped(batchSize).map(Tensor3D(_)) 496 | case Tensor4D(data) => data.grouped(batchSize).map(Tensor4D(_)) 497 | 498 | def equalRows[T: ClassTag](t1: Tensor[T], t2: Tensor[T]): Int = 499 | assert(t1.shape == t2.shape, sys.error(s"Tensors must have the same shape: ${t1.shape} != ${t2.shape}")) 500 | (t1, t2) match 501 | case (Tensor0D(data), Tensor0D(data2)) => 502 | if data == data2 then 1 else 0 503 | case (Tensor1D(data), Tensor1D(data2)) => 504 | data.zip(data2).count(_ == _) 505 | case (Tensor2D(data), Tensor2D(data2)) => 506 | data.zip(data2).foldLeft(0) { case (acc, (a, b)) => if a.sameElements(b) then acc + 1 else acc } 507 | case _ => 508 | sys.error(s"Tensors must be the same dimension: ${t1.shape} != ${t2.shape}") 509 | 510 | def clipInRange[T: ClassTag](t: Tensor[T], min: T, max: T)(using n: Numeric[T]): Tensor[T] = 511 | def clipValue(v: T) = 512 | val vAbs = v.abs 513 | if vAbs > max then max 514 | else if vAbs < min then min 515 | else v 516 | 517 | map(t, clipValue) 518 | 519 | def clipByNorm[T: ClassTag](t: Tensor[T], norm: T)(using n: Fractional[T]): Tensor[T] = 520 | val l2norm = castFromTo[Double, T](math.sqrt(castFromTo[T, Double](sum(pow(t, 2))))) 521 | if l2norm > norm then 522 | map(t, v => n.times(v, norm) / l2norm) 523 | else t 524 | 525 | def div[T: ClassTag: Fractional](t1: Tensor[T], t2: Tensor[T]): Tensor[T] = 526 | (t1, t2) match 527 | // broadcasting 528 | case (Tensor2D(data), Tensor0D(data2)) => Tensor2D(data.map(_.map(_ / data2))) 529 | case (Tensor1D(data), Tensor0D(data2)) => Tensor1D(data.map(_ / data2)) 530 | case (Tensor4D(data), Tensor0D(data2)) => Tensor4D(data.map(_.map(_.map(_.map(_ / data2))))) 531 | 532 | case (Tensor0D(data), Tensor0D(data2)) => Tensor0D(data / data2) 533 | case (Tensor1D(data), Tensor1D(data2)) => Tensor1D(data.zip(data2).map(_ /_)) 534 | case (Tensor2D(data), Tensor2D(data2)) => 535 | Tensor2D(matrixDivMatrix(data, data2)) 536 | case (Tensor4D(data), Tensor4D(data2)) => 537 | val res = data.zip(data2).map { (cubes1, cubes2) => 538 | cubes1.zip(cubes2).map { (mat1, mat2) => 539 | matrixDivMatrix(mat1, mat2) 540 | } 541 | } 542 | Tensor4D(res) 543 | case _ => notImplementedError(t1 :: t2 :: Nil) 544 | 545 | private def matrixDivMatrix[T: ClassTag: Fractional](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] = 546 | a.zip(b).map((a, b) => a.zip(b).map(_ / _)) 547 | 548 | def sqrt[T: ClassTag: Numeric](t: Tensor[T]): Tensor[T] = 549 | map(t, v => castFromTo[Double, T](math.sqrt(castFromTo[T, Double](v)))) 550 | 551 | def pow[T: ClassTag](t: Tensor[T], to: Int)(using n: Numeric[T]): Tensor[T] = 552 | def powValue(v: T) = 553 | val res = math.pow(n.toDouble(v), to) 554 | castFromTo[Double, T](res) 555 | def powArray(a: Array[T]) = 556 | a.map(powValue) 557 | def powMatrix(a: Array[Array[T]]) = 558 | a.map(_.map(powValue)) 559 | 560 | t match 561 | case Tensor0D(data) => Tensor0D(powValue(data)) 562 | case Tensor1D(data) => Tensor1D(powArray(data)) 563 | case Tensor2D(data) => Tensor2D(powMatrix(data)) 564 | case Tensor4D(data) => Tensor4D(data.map(_.map(powMatrix))) 565 | case _ => notImplementedError(t :: Nil) 566 | 567 | def zero[T: ClassTag](t: Tensor[T])(using n: Numeric[T]): Tensor[T] = 568 | t match 569 | case Tensor0D(_) => Tensor0D(n.zero) 570 | case Tensor1D(data) => Tensor1D(Array.fill(data.length)(n.zero)) 571 | case t1 @ Tensor2D(_) => 572 | val (rows, cols) = t1.shape2D 573 | Tensor2D(Array.fill(rows, cols)(n.zero)) 574 | case t1 @ Tensor3D(_) => 575 | val (cubes, rows, cols) = t1.shape3D 576 | Tensor3D(Array.fill(cubes, rows, cols)(n.zero)) 577 | case t1 @ Tensor4D(_) => 578 | val (tensors, cubes, rows, cols) = t1.shape4D 579 | Tensor4D(Array.fill(tensors, cubes, rows, cols)(n.zero)) 580 | 581 | def col[T: ClassTag](data: Array[Array[T]], i: Int): Array[T] = 582 | val to = i + 1 583 | slice(data, None, Some(i, to)).flatMap(_.headOption) 584 | 585 | def slice[T: ClassTag]( 586 | data: Array[Array[T]], 587 | rows: Option[(Int, Int)] = None, 588 | cols: Option[(Int, Int)] = None 589 | ): Array[Array[T]] = 590 | (rows, cols) match 591 | case (Some((rowsFrom, rowsTo)), Some((colsFrom, colsTo))) => 592 | sliceArr(data, (rowsFrom, rowsTo)).map(a => 593 | sliceArr(a, (colsFrom, colsTo)) 594 | ) 595 | case (None, Some((colsFrom, colsTo))) => 596 | data.map(a => sliceArr(a, (colsFrom, colsTo))) 597 | case (Some((rowsFrom, rowsTo)), None) => 598 | sliceArr(data, (rowsFrom, rowsTo)) 599 | case _ => data 600 | 601 | def sliceArr[T: ClassTag]( 602 | data: Array[Array[T]], 603 | rows: (Int, Int), 604 | cols: (Int, Int) 605 | ): Array[Array[T]] = 606 | sliceArr(data, rows).map(a => 607 | sliceArr(a, cols) 608 | ) 609 | 610 | def sliceArr[T]( 611 | data: Array[T], 612 | range: (Int, Int) 613 | ): Array[T] = 614 | val (l, r) = range 615 | val from = if l < 0 then data.length + l else l 616 | val to = if r < 0 then data.length + r else if r == 0 then data.length else r 617 | data.slice(from, to) 618 | 619 | // returns max index per array 620 | // for 2D Tensor: returns an array of indices where every element is a max index for a specific row 621 | def argMax[T: ClassTag](t: Tensor[T])(using n: Numeric[T]) = 622 | def maxIndex(a: Array[T]) = 623 | n.fromInt(a.indices.maxBy(a)) 624 | 625 | t match 626 | case Tensor2D(data) => Tensor1D(data.map(maxIndex)) 627 | case Tensor1D(data) => Tensor0D(maxIndex(data)) 628 | case Tensor0D(_) => t 629 | case _ => notImplementedError(t :: Nil) 630 | 631 | def outer[T: ClassTag: Numeric](t1: Tensor[T], t2: Tensor[T]): Tensor[T] = 632 | def product(a: Array[T], b: Array[T]) = 633 | val res = Array.ofDim(a.length, b.length) 634 | for i <- 0 until a.length do 635 | for j <- 0 until b.length do 636 | res(i)(j) = a(i) * b(j) 637 | res 638 | 639 | (t1, t2) match 640 | case (Tensor0D(d), Tensor0D(d2)) => Tensor0D(d * d2) 641 | case (Tensor0D(d), _) => scalarMul(t2, d) 642 | case (Tensor1D(d), Tensor0D(d2)) => scalarMul(t1, d2) 643 | case (Tensor1D(d), Tensor1D(d2)) => Tensor2D(product(d, d2)) 644 | case (Tensor1D(d), Tensor2D(d2)) => Tensor2D(product(d, d2.flatten)) 645 | case (Tensor2D(d), Tensor0D(d2)) => scalarMul(t1, d2) 646 | case (Tensor2D(d), Tensor1D(d2)) => Tensor2D(product(d.flatten, d2)) 647 | case (Tensor2D(d), Tensor2D(d2)) => Tensor2D(product(d.flatten, d2.flatten)) 648 | case _ => notImplementedError(t1 :: t2 :: Nil) 649 | 650 | def flatten[T: ClassTag](t: Tensor[T]): Tensor[T] = 651 | t match 652 | case Tensor0D(_) => t 653 | case Tensor1D(_) => t 654 | case Tensor2D(data) => Tensor1D(data.flatten) 655 | case _ => notImplementedError(t :: Nil) 656 | 657 | def diag[T: ClassTag](t: Tensor[T])(using n: Numeric[T]): Tensor[T] = 658 | t match 659 | case Tensor0D(_) => t 660 | case Tensor1D(d) => 661 | val res = Array.ofDim(d.length, d.length) 662 | for i <- 0 until d.length do 663 | for j <- 0 until d.length do 664 | res(i)(j) = if i == j then d(i) else n.zero 665 | Tensor2D(res) 666 | case t2 @ Tensor2D(d) => 667 | val size = t2.shape.min 668 | val res = Array.ofDim(size) 669 | for i <- 0 until size do 670 | for j <- 0 until size if i == j do 671 | res(i) = d(i)(j) 672 | Tensor1D(res) 673 | case _ => notImplementedError(t :: Nil) 674 | 675 | def max[T: ClassTag: Numeric](t: Tensor[T]): T = 676 | t match 677 | case Tensor0D(d) => d 678 | case Tensor1D(d) => d.max 679 | case Tensor2D(d) => d.map(_.max).max 680 | case Tensor3D(d) => d.map(_.map(_.max).max).max 681 | case Tensor4D(d) => d.map(_.map(_.map(_.max).max).max).max 682 | 683 | def reshape[T: ClassTag: Numeric](t: Tensor[T], shape: List[Int]): Tensor[T] = 684 | shape match 685 | case cubes :: rows :: cols :: _ => t match 686 | case Tensor2D(data) => 687 | Tensor4D(data.flatMap(_.grouped(cols).toArray.grouped(rows).toArray.grouped(cubes).toArray)) 688 | case _ => t 689 | case _ => t 690 | 691 | 692 | --------------------------------------------------------------------------------