├── project
    ├── build.properties
    └── Dependencies.scala
├── .scalafmt.conf
├── .gitignore
├── src
    ├── test
    │   └── scala
    │   │   ├── examples
    │   │       └── LinearRegSmokeTest.scala
    │   │   └── ml
    │   │       └── network
    │   │           ├── MaxPoolTest.scala
    │   │           └── Conv2DTest.scala
    └── main
    │   └── scala
    │       ├── ml
    │           ├── math
    │           │   └── geneircMath.scala
    │           ├── network
    │           │   ├── api.scala
    │           │   ├── metrics.scala
    │           │   ├── initialization.scala
    │           │   ├── loss.scala
    │           │   ├── activators.scala
    │           │   ├── optimizers.scala
    │           │   ├── ann.scala
    │           │   └── layers.scala
    │           ├── preprocessing
    │           │   ├── TextLoader.scala
    │           │   └── encoders.scala
    │           ├── transformation.scala
    │           └── tensors
    │           │   ├── ndarray.scala
    │           │   ├── tensor.scala
    │           │   └── ops.scala
    │       └── examples
    │           ├── fileUtils.scala
    │           ├── mnistCommon.scala
    │           ├── CNN.scala
    │           ├── MNIST.scala
    │           ├── multipleRegression.scala
    │           ├── linearRegression.scala
    │           └── MnistLoader.scala
├── .github
    └── workflows
    │   └── scala.yml
├── README.md
└── plots.sc


/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.5.5
2 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | version = "3.0.0-RC3"
2 | runner.dialect = scala3
3 | 


--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 | 
3 | object Dependencies {
4 |   lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.10"
5 | }
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .ammonite
 2 | .metals
 3 | .vscode
 4 | .dotty-*
 5 | .idea
 6 | target
 7 | .bsp
 8 | metals.sbt
 9 | .bloop
10 | .DS_Store
11 | notebooks
12 | metrics
13 | images


--------------------------------------------------------------------------------
/src/test/scala/examples/LinearRegSmokeTest.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import org.scalatest.flatspec.AnyFlatSpec
 4 | 
 5 | class LinearRegSmokeTest extends AnyFlatSpec {
 6 |   it should "run linear regression example without a fail" in {
 7 |     lrTest()
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/.github/workflows/scala.yml:
--------------------------------------------------------------------------------
 1 | name: Scala CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up JDK 11
17 |       uses: actions/setup-java@v1
18 |       with:
19 |         java-version: 11.0.x
20 |     - name: Run tests
21 |       run: sbt test
22 | 


--------------------------------------------------------------------------------
/src/main/scala/ml/math/geneircMath.scala:
--------------------------------------------------------------------------------
 1 | package ml.math
 2 | 
 3 | import scala.reflect.ClassTag
 4 | import ml.transformation.castFromTo
 5 | 
 6 | object generic:
 7 |   def exp[T: ClassTag](v: T)(using n: Numeric[T]): T =
 8 |     castFromTo[Double, T](math.exp(n.toDouble(v)))
 9 |   
10 |   def pow[T: ClassTag](x: T, y: T)(using n: Numeric[T]): T =    
11 |     castFromTo[Double, T](math.pow(n.toDouble(x), n.toDouble(y)))
12 | 
13 |   def max[T: ClassTag](x: T, y: T)(using n: Numeric[T]): T =
14 |     castFromTo[Double, T](math.max(n.toDouble(x), n.toDouble(y)))
15 |     
16 |   def log[T: ClassTag](x: T)(using n: Numeric[T]): T =
17 |     castFromTo[Double, T](math.log(n.toDouble(x)))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning in Scala from scratch
 2 | 
 3 | Example of mini-library implementation for Artificial Neural Networks training and inference.
 4 | 
 5 | __Implementation includes:__
 6 | 
 7 | 1. Mini-library for subset of Tensor calculus
 8 | 1. Mini-library for data preparation
 9 | 1. A DSL for Neural Network creation, including layers
10 | 1. Pluggable weights optimizer and initializer
11 | 1. Pluggable implementation of activation and loss functions
12 | 1. Pluggable training metric calculation
13 | 
14 | 
15 | See examples: 
16 | - [Classification with Artificial Neural Network](src/main/scala/examples/multipleRegression.scala)
17 | - [Linear Regression](src/main/scala/examples/linearRegression.scala)
18 | - [MNIST Images Classification](src/main/scala/examples/MNIST.scala)


--------------------------------------------------------------------------------
/src/main/scala/examples/fileUtils.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import ml.network.Model
 4 | 
 5 | import scala.util.Using
 6 | 
 7 | import java.io.File
 8 | import java.io.PrintWriter
 9 | import java.nio.file.Path
10 | 
11 | def store(filename: String, header: String, data: List[List[String]]) =    
12 |   Using.resource(new PrintWriter(new File(filename))) { w =>
13 |     w.write(header)
14 |     data.foreach { row =>      
15 |       w.write(s"\n${row.mkString(",")}")
16 |     }
17 |   }
18 | 
19 | def storeMetrics[T](model: Model[T], path: Path) =
20 |   val values = model.metricValues
21 |   val header = s"epoch,loss,${values.map(_._1.name).mkString(",")}"
22 |   val acc = values.headOption.map(_._2).getOrElse(Nil)
23 |   val lrData = model.history.losses.zip(acc).zipWithIndex.map { 
24 |     case ((loss, acc), epoch) => List(epoch.toString, loss.toString, acc.toString)      
25 |   } 
26 |   store(path.toString, header, lrData) 


--------------------------------------------------------------------------------
/src/main/scala/examples/mnistCommon.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import ml.network.api._
 4 | import ml.tensors.api._
 5 | import ml.tensors.ops._
 6 | import ml.preprocessing._
 7 | 
 8 | import scala.reflect.ClassTag
 9 | 
10 | object mnistCommon:
11 |   val imageDir = "images"
12 |   
13 |   def accuracyMnist[T: ClassTag: Ordering](using n: Numeric[T]) = new Metric[T]:
14 |     val name = "accuracy"
15 | 
16 |     def matches(actual: Tensor[T], predicted: Tensor[T]): Int =      
17 |         val predictedArgMax = predicted.argMax      
18 |         actual.argMax.equalRows(predictedArgMax)     
19 | 
20 |   def prepareData[T: ClassTag](x: Tensor[T], y: Tensor[T])(using n: Fractional[T]) =
21 |     val encoder = OneHotEncoder(
22 |       classes = (0 to 9).map(i => (n.fromInt(i), n.fromInt(i))).toMap
23 |     )  
24 |     val max = n.fromInt(255)
25 |     val xData = x.map(v =>  n.div(v, max)) // normalize to [0,1] range
26 |     val yData = encoder.transform(y.as1D)
27 |     (xData, yData)


--------------------------------------------------------------------------------
/src/main/scala/ml/network/api.scala:
--------------------------------------------------------------------------------
 1 | package ml.network
 2 | 
 3 | object api:  
 4 |   final type StandardGD = ml.network.StandardGD
 5 |   final type Adam = ml.network.Adam
 6 |   final type Stub = ml.network.Stub
 7 |   
 8 |   final type MetricValues[T] = ml.network.MetricValues[T]
 9 |   
10 |   final type RandomUniform = ml.network.RandomUniform
11 |   final type HeNormal = ml.network.HeNormal
12 |   
13 |   export ml.network.Dense
14 |   export ml.network.Conv2D
15 |   export ml.network.MaxPool
16 |   export ml.network.Flatten2D
17 |   export ml.network.Layer
18 |   export ml.network.optimizers.given
19 |   export ml.network.Optimizable
20 |   export ml.network.Sequential
21 |   export ml.network.Model
22 |   export ml.network.GradientClippingApi.*
23 |   export ml.network.GradientClippingApi
24 |   export ml.network.GradientClipping
25 |   export ml.network.MetricApi.*
26 |   export ml.network.Metric
27 |   export ml.network.LossApi.*
28 |   export ml.network.ActivationFuncApi.*
29 |   export ml.network.ParamsInitializer
30 |   export ml.network.inits.given
31 |   export ml.network.inits


--------------------------------------------------------------------------------
/src/main/scala/ml/network/metrics.scala:
--------------------------------------------------------------------------------
 1 | package ml.network
 2 | 
 3 | import ml.tensors.api._
 4 | import ml.tensors.ops._
 5 | 
 6 | import scala.reflect.ClassTag
 7 | 
 8 | trait Metric[T]:
 9 |   val name: String
10 | 
11 |   // number of matched predictions versus actual labels
12 |   def matches(
13 |       actual: Tensor[T],
14 |       predicted: Tensor[T]
15 |   ): Int
16 | 
17 |   def average(count: Int, matches: Int): Double =    
18 |     matches.toDouble / count
19 | 
20 |   def apply(actual: Tensor[T], predicted: Tensor[T]): Double =
21 |     val correct = matches(actual, predicted)
22 |     average(actual.length, correct)
23 | 
24 | object MetricApi:  
25 |   def predictedToBinary[T](v: T)(using n: Numeric[T]): T =
26 |     if n.toDouble(v) > 0.5 then n.one else n.zero
27 | 
28 |   def accuracyBinaryClassification[T: ClassTag: Fractional] = new Metric[T]:
29 |     val name = "accuracy"
30 | 
31 |     def matches(
32 |         actual: Tensor[T],
33 |         predicted: Tensor[T]
34 |     ): Int =      
35 |         val predictedBinary = predicted.map(predictedToBinary)
36 |         actual.equalRows(predictedBinary)


--------------------------------------------------------------------------------
/src/main/scala/ml/preprocessing/TextLoader.scala:
--------------------------------------------------------------------------------
 1 | package ml.preprocessing
 2 | 
 3 | import ml.transformation.{castTo, castArray}
 4 | import ml.tensors.api._
 5 | import ml.tensors.ops.{T, col, slice}
 6 | 
 7 | import java.io.File
 8 | import java.nio.file.Path
 9 | import scala.io.Source
10 | import scala.reflect.ClassTag
11 | import scala.util.Using
12 | 
13 | object TextLoader:
14 |   val defaultDelimiter: String = ","
15 | 
16 |   def apply(rows: String*): TextLoader =
17 |     TextLoader(data = rows.toArray.map(_.split(defaultDelimiter).toArray))
18 | 
19 | case class TextLoader(
20 |     path: Path = new File("data.csv").toPath,
21 |     header: Boolean = true,
22 |     delimiter: String = TextLoader.defaultDelimiter,
23 |     data: Array[Array[String]] = Array.empty[Array[String]]
24 | ):
25 | 
26 |   def load(): TextLoader = copy(
27 |     data = Using.resource(Source.fromFile(path.toFile)) { s =>
28 |       val lines = s.getLines()
29 |       (if header && lines.nonEmpty then lines.toArray.tail else lines.toArray)
30 |         .map(_.split(delimiter))
31 |     }
32 |   )
33 | 
34 |   def cols[T: ClassTag](from: Int, to: Int): Tensor2D[T] =
35 |     castTo[T](data.slice(None, Some((from, to))))
36 | 
37 |   def col[T: ClassTag](i: Int): Tensor1D[T] =
38 |     val col = data.col(i)
39 |     Tensor1D(castArray[T](col))
40 | 
41 |   def cols[T: ClassTag](i: Int): Tensor[T] = col(i).T


--------------------------------------------------------------------------------
/src/main/scala/ml/network/initialization.scala:
--------------------------------------------------------------------------------
 1 | package ml.network
 2 | 
 3 | import ml.transformation.castFromTo
 4 | import ml.tensors.api._
 5 | import ml.tensors.ops.as4D
 6 | 
 7 | import scala.reflect.ClassTag
 8 | import java.util.Random
 9 | 
10 | trait ParamsInitializer[A, B]:
11 | 
12 |   def weights(rows: Int, cols: Int): Tensor2D[A]
13 | 
14 |   def biases(length: Int): Tensor1D[A]
15 | 
16 |   def weights4D(
17 |       shape: List[Int]
18 |   )(using c: ClassTag[A], n: Numeric[A]): Tensor4D[A] =
19 |     val tensors :: cubes :: rows :: cols :: Nil = shape
20 |     (0 until tensors)
21 |       .map(_ => (0 until cubes).toArray.map(_ => weights(rows, cols)))
22 |       .toArray
23 |       .as4D
24 | 
25 | // support Initializers
26 | type RandomUniform
27 | type HeNormal
28 | 
29 | object inits:
30 |   def zeros[T: ClassTag](length: Int)(using n: Numeric[T]): Tensor1D[T] =
31 |     Tensor1D(Array.fill(length)(n.zero))
32 | 
33 |   given [T: Numeric: ClassTag]: ParamsInitializer[T, RandomUniform] with
34 | 
35 |     def gen: T =
36 |       castFromTo[Double, T](math.random().toDouble + 0.001d)
37 | 
38 |     override def weights(rows: Int, cols: Int): Tensor2D[T] =
39 |       Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen)))
40 | 
41 |     override def biases(length: Int): Tensor1D[T] =
42 |       zeros(length)
43 | 
44 |   given [T: ClassTag: Numeric]: ParamsInitializer[T, HeNormal] with
45 |     val rnd = new Random()
46 | 
47 |     def gen(lenght: Int): T =
48 |       castFromTo[Double, T] {
49 |         val v = rnd.nextGaussian + 0.001d
50 |         v * math.sqrt(2d / lenght.toDouble)
51 |       }
52 | 
53 |     override def weights(rows: Int, cols: Int): Tensor2D[T] =
54 |       Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen(rows))))
55 | 
56 |     override def biases(length: Int): Tensor1D[T] =
57 |       zeros(length)
58 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/CNN.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import mnistCommon._
 4 | import ml.transformation.castFromTo
 5 | import ml.tensors.api._
 6 | import ml.tensors.ops._
 7 | import ml.network.api._
 8 | import ml.network.api.given
 9 | import ml.network.api.inits.given
10 | import ml.preprocessing._
11 | 
12 | import java.nio.file.Path
13 | import java.util.Random
14 | import scala.reflect.ClassTag
15 | 
16 | @main 
17 | def CNN() =
18 |   type Precision = Float
19 |   val accuracy = accuracyMnist[Precision]
20 | 
21 |   def clipByNorm[T: Fractional: ClassTag](norm: T) = new GradientClipping[T]:     
22 |     def apply(t: Tensor[T]) =
23 |       t match
24 |         case (Tensor4D(data)) => 
25 |           data.map(_.map(_.as2D.clipByNorm(norm).as2D)).as4D // clipping within matrix 
26 |         case _ => 
27 |           t.clipByNorm(norm)
28 | 
29 |   val cnn = Sequential[Precision, Adam, HeNormal](
30 |     crossEntropy,
31 |     learningRate = 0.0015,
32 |     metrics = List(accuracy),
33 |     batchSize = 128,
34 |     gradientClipping = clipByNorm(10.0),
35 |     printStepTps = true
36 |   )    
37 |     .add(Conv2D(relu, 8, kernel = (5, 5)))    
38 |     .add(MaxPool(strides = (2, 2), window = (4, 4), padding = false))    
39 |     .add(Flatten2D())
40 |     .add(Dense(relu, 64))      
41 |     .add(Dense(softmax, 10))
42 |   
43 |   val dataset = MnistLoader.loadData[Precision](imageDir, flat = false)
44 |   val (xTrain, yTrain) = prepareData(dataset.trainImage, dataset.trainLabels)
45 |   
46 |   val start = System.currentTimeMillis()  
47 |   val model = cnn.train(xTrain, yTrain, epochs = 5, shuffle = true)
48 |   println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec")
49 | 
50 |   val (xTest, yTest) = prepareData(dataset.testImages, dataset.testLabels)
51 |   val testPredicted = model(xTest)
52 |   val value = accuracy(yTest, testPredicted)
53 |   println(s"test accuracy = $value")  


--------------------------------------------------------------------------------
/src/main/scala/examples/MNIST.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import mnistCommon._
 4 | import ml.transformation.{castTo, castFromTo}
 5 | import ml.tensors.api._
 6 | import ml.tensors.ops._
 7 | import ml.network.api._
 8 | import ml.network.api.given
 9 | import ml.network.inits.given
10 | import ml.preprocessing._
11 | 
12 | import java.nio.file.Path
13 | import scala.reflect.ClassTag
14 | 
15 | @main def MNIST() =
16 |   type Precision = Float
17 |   val dataset = MnistLoader.loadData[Precision](imageDir)
18 |   val accuracy = accuracyMnist[Precision]
19 | 
20 |   val ann = Sequential[Precision, Adam, HeNormal](
21 |     crossEntropy,
22 |     learningRate = 0.001,
23 |     metrics = List(accuracy),
24 |     batchSize = 128,
25 |     gradientClipping = clipByValue(5.0),
26 |     printStepTps = true
27 |   )
28 |     .add(Dense(relu, 50))      
29 |     .add(Dense(softmax, 10))
30 | 
31 |   val (xTrain, yTrain) = prepareData(dataset.trainImage, dataset.trainLabels)
32 |   val start = System.currentTimeMillis()
33 |   val model = ann.train(xTrain, yTrain, epochs = 15, shuffle = true)
34 |   println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec")
35 |   
36 |   val (xTest, yTest) = prepareData(dataset.testImages, dataset.testLabels)
37 |   val testPredicted = model(xTest)
38 |   val value = accuracy(yTest, testPredicted)
39 |   println(s"test accuracy = $value")  
40 | 
41 |   // Single Test
42 |   val singleTestImage = dataset.testImages.as2D.data.head
43 |   val imageMap = singleTestImage.grouped(28)
44 |     .map(_.map(s => f"${s.toInt}%4s").mkString).mkString("\n")
45 |   println(imageMap)
46 |   val label = dataset.testLabels.as1D.data.head  
47 |   val predicted = model(singleTestImage.as2D).argMax.as0D.data  
48 |   println(s"predicted = $predicted")
49 |   
50 |   assert(label == predicted, 
51 |     s"Predicted label is not equal to expected '$label' label, but was '$predicted'")
52 |   
53 |   storeMetrics(model, Path.of("metrics/mnist.csv"))    


--------------------------------------------------------------------------------
/src/main/scala/ml/transformation.scala:
--------------------------------------------------------------------------------
 1 | package ml
 2 | 
 3 | import ml.tensors.api._
 4 | import ml.tensors.ops._
 5 | import TypeHelper._
 6 | import scala.reflect.ClassTag
 7 | 
 8 | object TypeHelper:
 9 |   val String_ = classOf[String]
10 |   val Int_ = classOf[Int]
11 |   val Long_ = classOf[Long]
12 |   val Float_ = classOf[Float]
13 |   val Double_ = classOf[Double]
14 | 
15 | // set of functions to parse and cast in the same time  
16 | object transformation:
17 | 
18 |   def castTo[T: ClassTag](
19 |       data: Array[Array[String]]
20 |   ): Tensor2D[T] =
21 |     val transformed = data.map(castArray[T])
22 |     Tensor2D[T](transformed)
23 | 
24 |   def castArray[T: ClassTag](data: Array[String]): Array[T] =
25 |     summon[ClassTag[T]].runtimeClass match
26 |       case Float_  => data.map(_.toFloat.asInstanceOf[T])
27 |       case String_ => data.map(_.asInstanceOf[T])
28 |       case Double_ => data.map(_.toDouble.asInstanceOf[T])
29 | 
30 |   private def castFromIntTo[T: ClassTag](data: Int): T =
31 |     summon[ClassTag[T]].runtimeClass match
32 |       case Float_  => data.toFloat.asInstanceOf[T]
33 |       case String_ => data.toString.asInstanceOf[T]
34 |       case Double_ => data.toDouble.asInstanceOf[T]
35 |       case Int_    => data.asInstanceOf[T]
36 | 
37 |   def castFromTo[A, B](a: A)(using ev1: ClassTag[A], ev2: ClassTag[B]): B =
38 |     (ev1.runtimeClass, ev2.runtimeClass) match
39 |       case (Float_, String_)  => a.toString.asInstanceOf[B]
40 |       case (Float_, Double_)  => a.asInstanceOf[Float].toDouble.asInstanceOf[B]
41 |       case (Float_, Float_)   => a.asInstanceOf[B]
42 |       case (String_, Float_)  => a.toString.toFloat.asInstanceOf[B]
43 |       case (String_, Double_) => a.toString.toDouble.asInstanceOf[B]
44 |       case (Double_, String_) => a.toString.asInstanceOf[B]
45 |       case (Double_, Float_)  => a.asInstanceOf[Double].toFloat.asInstanceOf[B]
46 |       case (Double_, Double_) => a.asInstanceOf[B]
47 |       case (Int_, _)          => castFromIntTo[B](a.asInstanceOf[Int])


--------------------------------------------------------------------------------
/src/main/scala/ml/tensors/ndarray.scala:
--------------------------------------------------------------------------------
 1 | package ml.tensors.api
 2 | 
 3 | import scala.reflect.ClassTag
 4 | 
 5 | case class NDArray[T: ClassTag: Numeric](data: Array[Any], shape: List[Int]):  
 6 |   private def printArray(a: Array[Any], level: Int = 1): Array[String] = 
 7 |     a.map { e =>
 8 |       e match 
 9 |         case ar: Array[Any] =>
10 |           val start = s"\n${" " * level}["
11 |           val body = printArray(ar, level + 1).mkString(",")          
12 |           val end = if body.last == ']' then s"\n${" " * level}]" else "]"
13 |           s"$start$body$end"
14 |         case _ => s"$e"
15 |     }     
16 | 
17 |   override def toString: String = 
18 |     val str = printArray(data).mkString(", ")
19 |     "[" + str + (if str.last == ']' then "\n" else "") + "]"
20 |     
21 | 
22 | object NDArray:
23 |   def init[T](shape: List[Int], v: T)(using n: Numeric[T]): Array[Any] =
24 |     shape match 
25 |       case Nil => Array(v)
26 |       case h :: Nil =>  Array.fill(h)(v)
27 |       case h :: t => Array.fill(h)(init(t, v))
28 | 
29 |   def zeros[T: ClassTag](shape: Int*)(using n: Numeric[T]): NDArray[T] =     
30 |     NDArray[T](init(shape.toList, n.zero), shape.toList)
31 | 
32 |   def ones[T: ClassTag](shape: Int*)(using n: Numeric[T]): NDArray[T] =     
33 |     NDArray[T](init(shape.toList, n.one), shape.toList)
34 | 
35 | extension [T: ClassTag: Numeric](a: NDArray[T])
36 |   protected def reshape(shape: Int*): NDArray[T] =
37 |     val newShape = shape.toList
38 |     assert(a.shape.reduce(_ * _) == newShape.reduce(_ * _), s"Current shape ${a.shape} does not fit new shape = $shape")
39 |     
40 |     @annotation.tailrec
41 |     def group(ar: Array[Any], shape: List[Int]): Array[Any] =
42 |       shape match
43 |         case h :: Nil => ar.grouped(h).toArray
44 |         case h :: t => group(ar.grouped(h).toArray, t)
45 |         case _ => ar
46 |         
47 |     NDArray[T](group(a.data, newShape.reverse), newShape)      
48 | 
49 | 
50 | @main 
51 | def test =  
52 |   val ones = NDArray.ones[Int](16)
53 |   println(ones)  
54 |   println(ones.reshape(2, 2, 2, 2))
55 |   
56 |   NDArray[Int](Array(Array(Array(1))), List(1,1,1))


--------------------------------------------------------------------------------
/src/main/scala/ml/network/loss.scala:
--------------------------------------------------------------------------------
 1 | package ml.network
 2 | 
 3 | import ml.transformation.castFromTo
 4 | import ml.tensors.api._
 5 | import ml.tensors.ops._
 6 | import ml.math.generic._
 7 | 
 8 | import scala.math.Numeric.Implicits._
 9 | import scala.reflect.ClassTag
10 | 
11 | trait Loss[T]:
12 |   def apply(
13 |       actual: Tensor[T],
14 |       predicted: Tensor[T]
15 |   ): T
16 | 
17 | object LossApi:
18 |   private def calcMetric[T: Numeric: ClassTag](
19 |     t1: Tensor[T], t2: Tensor[T], f: (T, T) => T
20 |   ) = 
21 |     (t1, t2) match
22 |       case (Tensor1D(a), Tensor1D(b)) =>         
23 |         val sum = (t1, t2).map2(f).sum //TODO: sum and then apply f ?
24 |         (sum, t1.length)
25 |       case (t @ Tensor2D(a), Tensor2D(b)) =>
26 |         val (rows, cols) = t.shape2D
27 |         val sum = (t1, t2).map2(f).sum //TODO: sum and then apply f ?
28 |         (sum, rows * cols)
29 |       case (Tensor0D(a), Tensor0D(b)) =>        
30 |         (f(a, b), 1)
31 |       case _ => 
32 |         sys.error(s"Both tensors must be the same shape: ${t1.shape} != ${t2.shape}")
33 | 
34 |   private def mean[T: Numeric: ClassTag](count: Int, sum: T): Double =
35 |     castFromTo[T, Double](sum) / count
36 | 
37 |   def meanSquareError[T: ClassTag](using n: Numeric[T]) = new Loss[T]:
38 |     def calc(a: T, b: T): T =
39 |       pow(a - b, n.fromInt(2))
40 | 
41 |     override def apply(
42 |         actual: Tensor[T],
43 |         predicted: Tensor[T]
44 |     ): T =      
45 |       val (sumScore, count) = calcMetric(actual, predicted, calc)      
46 |       val meanSumScore = mean(count, sumScore)
47 |       castFromTo(meanSumScore) 
48 | 
49 |   def crossEntropy[T: ClassTag: Numeric] = new Loss[T]:
50 |     def calc(y: T, yHat: T): T = 
51 |       y * log(yHat)
52 | 
53 |     override def apply(
54 |         actual: Tensor[T],
55 |         predicted: Tensor[T]
56 |     ): T =
57 |       val (sumScore, count) = calcMetric(actual, predicted, calc)        
58 |       val meanSumScore = mean(count, sumScore)
59 |       castFromTo(-meanSumScore)
60 |   
61 |   def binaryCrossEntropy[T: ClassTag](using n: Numeric[T]) = new Loss[T]:
62 |     def calc(y: T, yHat: T): T =      
63 |       y * log(yHat) + (n.one - y) * log(n.one - yHat)
64 |        
65 |     override def apply(
66 |         actual: Tensor[T],
67 |         predicted: Tensor[T]
68 |     ): T =
69 |       val (sumScore, count) = calcMetric(actual, predicted, calc)        
70 |       val meanSumScore = mean(count, sumScore)
71 |       castFromTo(-meanSumScore)


--------------------------------------------------------------------------------
/src/main/scala/examples/multipleRegression.scala:
--------------------------------------------------------------------------------
 1 | package examples
 2 | 
 3 | import ml.preprocessing._
 4 | import ml.transformation.castTo
 5 | import ml.tensors.api._
 6 | import ml.tensors.ops._
 7 | import ml.network.api._
 8 | import ml.network.api.given
 9 | import ml.network.inits.given
10 | 
11 | import java.nio.file.Path
12 | import java.io.{File, PrintWriter}
13 | import scala.reflect.ClassTag
14 | import scala.util.Using
15 | 
16 | @main def multipleRegression() =
17 | 
18 |   def createEncoders[T: Numeric: ClassTag](
19 |     data: Tensor2D[String]
20 |   ): Tensor2D[String] => Tensor2D[T] =
21 |     val hotEncoder = OneHotEncoder[String, T]().fit(data.col(1)) // geography
22 |     val encoder = LabelEncoder[String]().fit(data.col(2)) // gender
23 | 
24 |     val hot = t => hotEncoder.transform(t, 1)
25 |     val label = t => encoder.transform(t, 2)
26 |     val typeTransform = (t: Tensor2D[String]) => castTo[T](t.data)
27 |     
28 |     label andThen hot andThen typeTransform
29 |   
30 |   val accuracy = accuracyBinaryClassification[Double]
31 |   
32 |   val ann = Sequential[Double, Adam, RandomUniform](
33 |     binaryCrossEntropy,
34 |     learningRate = 0.002d,
35 |     metrics = List(accuracy),
36 |     batchSize = 64,
37 |     gradientClipping = clipByValue(5.0d)
38 |   )
39 |     .add(Dense(leakyRelu, 6))
40 |     .add(Dense(leakyRelu, 6))    
41 |     .add(Dense(sigmoid))
42 |   
43 |   val dataLoader = TextLoader(Path.of("data", "Churn_Modelling.csv")).load()
44 |   val data = dataLoader.cols[String](3, -1)
45 |   
46 |   val encoders = createEncoders[Double](data)
47 |   val numericData = encoders(data)
48 |   val scaler = StandardScaler[Double]().fit(numericData)
49 |   
50 |   val prepareData = (t: Tensor2D[String]) => {
51 |     val numericData = encoders(t)
52 |     scaler.transform(numericData)
53 |   }
54 |   
55 |   val x = prepareData(data)
56 |   val y = dataLoader.cols[Double](-1)
57 |   
58 |   val ((xTrain, xTest), (yTrain, yTest)) = (x, y).split(0.2f)
59 |   
60 |   val start = System.currentTimeMillis()
61 |   val model = ann.train(xTrain, yTrain, epochs = 100)
62 |   println(s"training time: ${(System.currentTimeMillis() - start) / 1000f} in sec")
63 | 
64 |   // Single test
65 |   val example = TextLoader(
66 |     "n/a,n/a,n/a,600,France,Male,40,3,60000,2,1,1,50000,n/a"
67 |   ).cols[String](3, -1)
68 |   val testExample = prepareData(example)
69 |   val yHat = model(testExample)
70 |   val exited = predictedToBinary(yHat.as0D.data) == 1d
71 |   println(s"Exited customer? $exited")
72 |   
73 |   // Test Dataset
74 |   val testPredicted = model(xTest)
75 |   val value = accuracy(yTest, testPredicted)
76 |   println(s"test accuracy = $value")  
77 |   
78 |   storeMetrics(model, Path.of("metrics/ann.csv"))


--------------------------------------------------------------------------------
/src/test/scala/ml/network/MaxPoolTest.scala:
--------------------------------------------------------------------------------
  1 | package ml.network
  2 | 
  3 | import ml.tensors.api._
  4 | import ml.tensors.ops._
  5 | 
  6 | import scala.reflect.ClassTag
  7 | import scala.math.Numeric.Implicits._
  8 | 
  9 | import org.scalatest.flatspec.AnyFlatSpec
 10 | import org.scalatest.matchers.should.Matchers
 11 | 
 12 | class MaxPoolTest extends AnyFlatSpec with Matchers {
 13 |   val image = Tensor4D(
 14 |     Array(
 15 |       Array(
 16 |         Array(
 17 |           Array(1d, 2, 3, 3),
 18 |           Array(2d, 3, 4, 3),
 19 |           Array(5d, 6, 7, 3)
 20 |         )
 21 |       )
 22 |     )
 23 |   )
 24 | 
 25 |   it should "do forward and backward propagation without padding" in {
 26 |     val unpadded = Array(
 27 |       Array(
 28 |         Array(
 29 |           Array(3d, 4, 4),
 30 |           Array(6d, 7, 7)
 31 |         )
 32 |       )
 33 |     )
 34 | 
 35 |     // FORWARD
 36 |     // given
 37 |     val noPaddingLayer = MaxPool[Double](padding = false).init(image.shape)
 38 |     // when
 39 |     val noPaddingAct = noPaddingLayer(image)
 40 |     // then
 41 |     val z = noPaddingAct.z.as4D
 42 |     z.shape should ===(noPaddingLayer.shape)
 43 |     z.data should ===(unpadded)
 44 | 
 45 |     val unpaddedDelta = Array(
 46 |       Array(
 47 |         Array(
 48 |           Array(1d, 2, 3),
 49 |           Array(7d, 1, 2)
 50 |         )
 51 |       )
 52 |     )
 53 | 
 54 |     val Gradient(unpaddedNextDelta, _, _) =
 55 |       noPaddingLayer.backward(noPaddingAct, unpaddedDelta.as4D, None)
 56 | 
 57 |     unpaddedNextDelta.as4D.data should ===(
 58 |       Array(
 59 |         Array(
 60 |           Array(
 61 |             Array(0d, 0, 0, 0),
 62 |             Array(0d, 1, 3, 0),
 63 |             Array(0d, 7, 2, 0)
 64 |           )
 65 |         )
 66 |       )
 67 |     )
 68 |   }
 69 | 
 70 |   it should "do forward propagation with padding" in {
 71 |     // given
 72 |     val padded = Array(
 73 |       Array(
 74 |         Array(
 75 |           Array(3d, 4, 4, 3),
 76 |           Array(6d, 7, 7, 3),
 77 |           Array(6d, 7, 7, 3)
 78 |         )
 79 |       )
 80 |     )
 81 |     val paddedLayer = MaxPool[Double](padding = true).init(image.shape)
 82 |     // when
 83 |     val a = paddedLayer(image)
 84 | 
 85 |     // then
 86 |     a.z.shape should ===(paddedLayer.shape)
 87 |     a.z.as4D.data should ===(padded)
 88 | 
 89 |     // BACKWARD
 90 |     // given
 91 |     val delta = Array(
 92 |       Array(
 93 |         Array(
 94 |           Array(1d, 2, 3, 1),
 95 |           Array(7d, 1, 2, 1),
 96 |           Array(1d, 1, 2, 1)
 97 |         )
 98 |       )
 99 |     )
100 |     // when
101 |     val Gradient(nextDelta, w, b) = paddedLayer.backward(a, delta.as4D, None)
102 | 
103 |     //then
104 |     nextDelta.as4D.shape4D should ===(a.x.as4D.shape4D)
105 |     w should ===(None)
106 |     b should ===(None)
107 | 
108 |     withClue(s"$nextDelta") {
109 |       nextDelta.as4D.data should ===(
110 |         Array(
111 |           Array(
112 |             Array(
113 |               Array(0d, 0, 0, 1),
114 |               Array(0d, 1, 3, 1),
115 |               Array(0d, 1, 2, 1)
116 |             )
117 |           )
118 |         )
119 |       )
120 |     }
121 |   }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/main/scala/ml/network/activators.scala:
--------------------------------------------------------------------------------
 1 | package ml.network
 2 | 
 3 | import ml.transformation.castFromTo
 4 | import ml.tensors.api._
 5 | import ml.tensors.ops._
 6 | import ml.math.generic._
 7 | 
 8 | import math.Ordering.Implicits.infixOrderingOps
 9 | import math.Fractional.Implicits.infixFractionalOps
10 | import scala.reflect.ClassTag
11 | 
12 | trait ActivationFunc[T]:
13 |   val name: String
14 |   def apply(x: Tensor[T]): Tensor[T]
15 |   def derivative(x: Tensor[T]): Tensor[T]
16 | 
17 | object ActivationFuncApi:
18 |   def relu[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]:
19 | 
20 |     override def apply(x: Tensor[T]): Tensor[T] =
21 |       x.map(t => if t < n.zero then n.zero else t)
22 | 
23 |     override def derivative(x: Tensor[T]): Tensor[T] =
24 |       x.map(t => if t < n.zero then n.zero else n.one)
25 | 
26 |     override val name = "relu"
27 |   
28 |   def leakyRelu[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]:
29 |     val scaler = castFromTo[Double, T](0.01)
30 |     
31 |     override def apply(x: Tensor[T]): Tensor[T] =
32 |       x.map(t =>  if t < n.zero then n.times(scaler, t) else t)
33 | 
34 |     override def derivative(x: Tensor[T]): Tensor[T] =
35 |       x.map(t => if t < n.zero then scaler else n.one)
36 | 
37 |     override val name = "leakyRelu"
38 |   
39 |   def sigmoid[T: ClassTag](using n: Fractional[T]) = new ActivationFunc[T]:
40 | 
41 |     override def apply(x: Tensor[T]): Tensor[T] =
42 |       x.map(t => n.one / (n.one + exp(-t)))
43 | 
44 |     override def derivative(x: Tensor[T]): Tensor[T] =
45 |       x.map(t => exp(-t) / pow(n.one + exp(-t), n.fromInt(2)))
46 |     
47 |     override val name = "sigmoid"  
48 | 
49 |   def softmax[T: ClassTag: Ordering](using n: Fractional[T]) = new ActivationFunc[T]:
50 |     val toleration = castFromTo[Double, T](0.9E-6d)
51 | 
52 |     override def apply(x: Tensor[T]): Tensor[T] =      
53 |       val applied = x.mapRow { row =>
54 |         val max = row.max        
55 |         val expNorm = row.map(v => exp(v - max))         
56 |         val sum = expNorm.sum        
57 |         expNorm.map(_ / sum)
58 |       }
59 | 
60 |       val appliedSum = applied.sumCols.map(
61 |         v => 
62 |           if v.abs - toleration > n.one 
63 |           then v 
64 |           else n.one
65 |       )
66 |       val totalSum = appliedSum.sumRows.as1D.data.head
67 |       assert(totalSum == x.length, 
68 |         s"Softmax distribution sum is not equal to 1 at some activation, but\n${appliedSum}")
69 |       applied
70 |           
71 |     override def derivative(x: Tensor[T]): Tensor[T] =       
72 |       val sm = apply(x)      
73 |       sm.multiply(n.one - sm)
74 | 
75 |     // override def derivative(x: Tensor[T]): Tensor[T] = 
76 |       // println(s"derivative x:\n$x")
77 |     //   val sm = apply(x)
78 |     //   sm.mapRow { row =>
79 |     //     val t = Tensor1D(row)        
80 |     //     val dxDs = t.diag - (t * t)
81 |     //     dxDs.sumRows.as1D.data                
82 |     //   }      
83 |       
84 |     override val name = "softmax"  
85 |   
86 |   def linear[T] = new ActivationFunc[T]:
87 |     override def apply(x: Tensor[T]): Tensor[T] = x
88 |     override def derivative(x: Tensor[T]): Tensor[T] = x
89 |     override val name = "linear"  


--------------------------------------------------------------------------------
/src/main/scala/examples/linearRegression.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | 
  3 | import ml.network.api._
  4 | import ml.network.api.given
  5 | import ml.tensors.api._
  6 | import ml.tensors.ops._
  7 | import ml.network.inits.given
  8 | 
  9 | import scala.reflect.ClassTag
 10 | import scala.math.Numeric.Implicits._
 11 | import scala.collection.mutable.ArrayBuffer
 12 | import scala.util.{Random, Using}
 13 | import scala.collection.parallel.CollectionConverters._
 14 | 
 15 | import java.io.{File,PrintWriter}
 16 | 
 17 | @main 
 18 | def linearRegression() = lrTest(false)
 19 | 
 20 | def lrTest(fromTest: Boolean = true) =   
 21 |   type Precision = Float
 22 |   val random = new Random()
 23 |   val weight = random.nextFloat()
 24 |   val bias = random.nextFloat()
 25 | 
 26 |   def batch(batchSize: Int): (ArrayBuffer[Precision], ArrayBuffer[Precision]) =
 27 |     val inputs = ArrayBuffer.empty[Precision]
 28 |     val outputs = ArrayBuffer.empty[Precision]
 29 |     def noise = 0//random.nextFloat / 5
 30 |     (0 until batchSize).foldLeft(inputs, outputs) { case ((x, y), _) =>        
 31 |         val rnd = random.nextFloat
 32 |         x += rnd + noise
 33 |         y += bias + weight * rnd + noise
 34 |         (x, y)
 35 |     }
 36 |   
 37 |   val optimizer = "adam"
 38 | 
 39 |   val ann = Sequential[Precision, Adam, RandomUniform](
 40 |     meanSquareError,
 41 |     learningRate = 0.001,    
 42 |     batchSize = 16,
 43 |     gradientClipping = _.clipByNorm(10.0)
 44 |   ).add(Dense())    
 45 | 
 46 |   val (xBatch, yBatch) = batch(10000)
 47 |   val x = Tensor1D(xBatch.toArray)
 48 |   val y = Tensor1D(yBatch.toArray)
 49 |   val ((xTrain, xTest), (yTrain, yTest)) = (x, y).split(0.2)
 50 | 
 51 |   val model = ann.train(xTrain.T, yTrain.T, epochs = 100)
 52 | 
 53 |   println(s"current weight: ${model.layers}")
 54 |   println(s"true weight: $weight")
 55 |   println(s"true bias: $bias")
 56 | 
 57 |   // Test Dataset
 58 |   val testPredicted = model(xTest.T)  
 59 |   val value = meanSquareError[Precision].apply(yTest.T, testPredicted)
 60 |   println(s"test meanSquareError = $value")
 61 | 
 62 |   if !fromTest then
 63 |     //////////////////////////////////////////
 64 |     // Store all posible data for plotting ///
 65 |     //////////////////////////////////////////
 66 | 
 67 |     // datapoints
 68 |     val dataPoints = xBatch.zip(yBatch).map((x, y) => List(x.toString, y.toString))
 69 |     store("metrics/datapoints.csv", "x,y", dataPoints.toList)
 70 | 
 71 |     //Store loss metric into CSV file
 72 |     val lossData = model.history.losses.zipWithIndex.map((l,i) => List(i.toString, l.toString))
 73 |     store("metrics/lr.csv", "epoch,loss", lossData)
 74 | 
 75 |     //gradient
 76 |     val gradientData = model.history.layers.zip(model.history.losses)
 77 |         .map { (layers, loss) => 
 78 |           layers.headOption.collect {
 79 |             case l @ Dense(_, _, _ , Some(w), Some(b), _) =>               
 80 |               List(w.as1D.data.head.toString, b.as1D.data.head.toString)
 81 |             case _ => Nil
 82 |           }.toList.flatten :+ loss.toString
 83 |         }
 84 | 
 85 |     store(s"metrics/$optimizer-gradient.csv", "w,b,loss", gradientData)
 86 | 
 87 |     // loss surface
 88 |     val weights = for (i <- 0 until 100) yield i/100.0f
 89 |     val biases = weights
 90 |     
 91 |     println("Calculating loss surface")
 92 |     val losses = weights.par.map { w =>
 93 |       val wT = w.as2D
 94 |       biases.foldLeft(ArrayBuffer.empty[Precision]) { (acc, b) =>
 95 |         val loss = ann.loss(x.T, y.T, List(Dense(w = Some(wT), b = Some(b.as1D))))  
 96 |         acc :+ loss
 97 |       }
 98 |     }
 99 |     println("Done calculating loss surface.")
100 | 
101 |     val metricsData = weights.zip(biases).zip(losses)
102 |       .map { case ((w, b), l) => List(w.toString, b.toString, l.mkString("\"", ",", "\"")) }
103 |     
104 |     store(s"metrics/$optimizer-lr-surface.csv", "w,b,l", metricsData.toList)


--------------------------------------------------------------------------------
/src/main/scala/ml/preprocessing/encoders.scala:
--------------------------------------------------------------------------------
  1 | package ml.preprocessing
  2 | 
  3 | import Encoder._
  4 | import ml.transformation.castFromTo
  5 | import ml.tensors.api._
  6 | import ml.tensors.ops.T
  7 | 
  8 | import scala.collection.mutable.ArrayBuffer
  9 | import scala.reflect.ClassTag
 10 | 
 11 | object Encoder:
 12 |   def toClasses[T: ClassTag: Ordering, U: ClassTag](
 13 |       samples: Tensor1D[T]
 14 |   ): Map[T, U] =
 15 |     samples.data.distinct.sorted.zipWithIndex.toMap.view
 16 |       .mapValues(castFromTo[Int, U])
 17 |       .toMap
 18 |       
 19 | case class LabelEncoder[T: ClassTag: Ordering](
 20 |     classes: Map[T, T] = Map.empty[T, T]
 21 | ):
 22 |   def fit(samples: Tensor1D[T]): LabelEncoder[T] =
 23 |     LabelEncoder(toClasses[T, T](samples))
 24 | 
 25 |   def transform(t: Tensor2D[T], col: Int): Tensor2D[T] =
 26 |     val data = t.data.map(
 27 |       _.zipWithIndex.map { (d, i) =>
 28 |         if i == col then classes.getOrElse(d, d) else d
 29 |       }
 30 |     )
 31 |     Tensor2D(data)
 32 | 
 33 | /**
 34 |  * T - key type
 35 |  * U - numeric value type for the key type
 36 |  */
 37 | case class OneHotEncoder[
 38 |     T: Ordering: ClassTag,
 39 |     U: Ordering: ClassTag
 40 | ](
 41 |     classes: Map[T, U] = Map.empty[T, U],
 42 |     notFound: Int = -1
 43 | )(using n: Numeric[U]):
 44 |   def fit(samples: Tensor1D[T]): OneHotEncoder[T, U] =
 45 |     OneHotEncoder[T, U](toClasses[T, U](samples))
 46 | 
 47 |   def transform(t: Tensor1D[T]): Tensor2D[T] =
 48 |     Tensor2D(t.data.map(encode))
 49 | 
 50 |   private def encode(v: T) = 
 51 |     val zero = castFromTo[Int, T](0)
 52 |     val array = Array.fill[T](classes.size)(zero)
 53 |     val pos = classes.get(v)
 54 |     pos match
 55 |       case Some(p) =>
 56 |         array(n.toInt(p)) = castFromTo[U, T](n.one)
 57 |       case None =>
 58 |         array(0) = castFromTo[U, T](n.fromInt(notFound))
 59 |     array
 60 | 
 61 |   def transform(t: Tensor2D[T], col: Int): Tensor2D[T] =    
 62 |     val data = t.data.map { row =>
 63 |       row.zipWithIndex
 64 |         .foldLeft(ArrayBuffer.empty[T]) { case (acc, (v, i)) =>
 65 |           if i == col then acc ++ encode(v)
 66 |           else acc :+ v
 67 |         }
 68 |         .toArray[T]
 69 |     }
 70 |     Tensor2D(data)
 71 | 
 72 | case class ColumnStat(mean: Double, stdDev: Double)
 73 | 
 74 | case class StandardScaler[T: Numeric: ClassTag](
 75 |     stats: Array[ColumnStat] = Array.empty
 76 | ):
 77 |   def fit(samples: Tensor[T]): StandardScaler[T] =
 78 |     samples match
 79 |       case Tensor1D(data) =>
 80 |         StandardScaler(Array(fitColumn(data)))
 81 |       case t @ Tensor2D(_) =>
 82 |         StandardScaler(t.T.data.map(fitColumn))
 83 |       case Tensor0D(_) => StandardScaler()
 84 |       case _ => 
 85 |         sys.error(s"Not implemented for: $samples")
 86 | 
 87 |   private def fitColumn(data: Array[T]) =
 88 |     val nums = data.map(castFromTo[T, Double])
 89 |     val mean = nums.sum / data.length
 90 |     val stdDev = math.sqrt(
 91 |       nums.map(n => math.pow(n - mean, 2)).sum / (data.length - 1)
 92 |     )
 93 |     ColumnStat(mean, stdDev)
 94 | 
 95 |   def transform(t: Tensor[T]): Tensor[T] =
 96 |     t match
 97 |       case Tensor1D(data) =>
 98 |         val stat = stats.headOption.getOrElse(
 99 |           sys.error(s"There is no statistics for $t")
100 |         )
101 |         val res = data.map(n =>
102 |           castFromTo[Double, T](scale(castFromTo[T, Double](n), stat))
103 |         )
104 |         Tensor1D(res)
105 |       case t2 @ Tensor2D(data) =>
106 |         val (rows, cols) = t2.shape2D
107 |         val res = Array.ofDim[T](rows, cols)
108 | 
109 |         for i <- 0 until rows do
110 |           for j <- 0 until cols do
111 |             val stat = stats(j)
112 |             val n = castFromTo[T, Double](data(i)(j))
113 |             res(i)(j) = castFromTo[Double, T](scale(n, stat))
114 |         Tensor2D(res)
115 |       case Tensor0D(_) => t // scaling is not applicable for scalar tensor
116 |       case _ => sys.error(s"Not implemented for: $t")
117 | 
118 |   private def scale(n: Double, stat: ColumnStat): Double =
119 |     (n - stat.mean) / stat.stdDev
120 | 


--------------------------------------------------------------------------------
/src/main/scala/ml/tensors/tensor.scala:
--------------------------------------------------------------------------------
  1 | package ml.tensors.api
  2 | 
  3 | import scala.math.Numeric.Implicits._
  4 | import scala.reflect.ClassTag
  5 | import math.Ordering.Implicits.infixOrderingOps
  6 | import Tensor._
  7 | 
  8 | sealed trait Tensor[T]:
  9 | 
 10 |   def shape: List[Int]
 11 | 
 12 |   def length: Int = shape.headOption.getOrElse(0)
 13 | 
 14 |   def shape(axis: Int): List[Int] = shape.drop(axis)
 15 | 
 16 |   def meta[T: ClassTag] = 
 17 |     s"shape: ${shape.mkString("x")}, ${getClass.getSimpleName}[${summon[ClassTag[T]]}]"
 18 | 
 19 | object Tensor:    
 20 |   def printArray(a: Array[_], meta: String): String = 
 21 |     def loop(a: Array[_], level: Int = 1): Array[String] = 
 22 |       a.map { e =>
 23 |         e match 
 24 |           case ar: Array[_] =>
 25 |             val start = s"\n${" " * level}["
 26 |             val body = loop(ar, level + 1).mkString(",")          
 27 |             val end = if body.last == ']' then s"\n${" " * level}]" else "]"
 28 |             s"$start$body$end"
 29 |           case _ => s"$e"
 30 |       }    
 31 |     val str = loop(a).mkString(", ")  
 32 |     s"$meta:\n[" + str + (if str.last == ']' then "\n" else "") + "]"  
 33 | 
 34 |   def of[T:ClassTag](size: Int, size2: Int): Tensor2D[T] = 
 35 |     Tensor2D[T](Array.fill(size)(of[T](size2).data))
 36 | 
 37 |   def of[T: ClassTag](size: Int): Tensor1D[T] = 
 38 |     Tensor1D[T](Array.ofDim[T](size))
 39 | 
 40 | case class Tensor0D[T: ClassTag](data: T) extends Tensor[T]:
 41 |   override val length: Int = 1
 42 |   
 43 |   override val shape: List[Int] = length :: Nil
 44 | 
 45 |   override def toString: String =
 46 |     s"$meta:\n" + data + "\n"  
 47 | 
 48 | case class Tensor1D[T: ClassTag](data: Array[T]) extends Tensor[T]:
 49 |   override def shape: List[Int] = List(data.length)
 50 | 
 51 |   override def toString: String =    
 52 |     printArray(data, meta)
 53 | 
 54 |   override def length: Int = data.length
 55 | 
 56 | object Tensor1D:
 57 |   def apply[T: ClassTag](data: T*): Tensor1D[T] = 
 58 |     Tensor1D[T](data.toArray)
 59 | 
 60 | case class Tensor2D[T: ClassTag](data: Array[Array[T]]) extends Tensor[T]:
 61 |   override def shape: List[Int] =
 62 |     shape2D.toList    
 63 | 
 64 |   def shape2D: (Int, Int) =
 65 |     (data.length, data.headOption.map(_.length).getOrElse(0))
 66 | 
 67 |   private val meta =
 68 |     s"shape: ${shape.mkString("x")}, Tensor2D[${summon[ClassTag[T]]}]"
 69 | 
 70 |   override def toString: String =
 71 |     printArray(data, meta)    
 72 | 
 73 |   override def length: Int = data.length
 74 | 
 75 |   override def shape(axis: Int) = 
 76 |     shape.drop(axis)
 77 | 
 78 | object Tensor2D:
 79 |   def apply[T: ClassTag](rows: Array[T]*): Tensor2D[T] =
 80 |     Tensor2D[T](rows.toArray)
 81 | 
 82 | case class Tensor3D[T: ClassTag](data: Array[Array[Array[T]]]) extends Tensor[T]:
 83 |   def shape3D: (Int, Int, Int) =
 84 |     val rows = data.headOption.map(_.length).getOrElse(0)
 85 |     val cols = data.headOption.flatMap(_.headOption.map(_.length)).getOrElse(0)
 86 |     (data.length, rows, cols)
 87 |   
 88 |   override def shape: List[Int] =
 89 |     shape3D.toList    
 90 | 
 91 |   override def length: Int = data.length
 92 | 
 93 |   override def toString: String =    
 94 |     printArray(data, meta)
 95 | 
 96 | object Tensor3D:
 97 |   def apply[T: ClassTag](matrices: Tensor2D[T]*): Tensor3D[T] =
 98 |     Tensor3D(matrices.toArray.map(_.data))
 99 | 
100 | case class Tensor4D[T: ClassTag](data: Array[Array[Array[Array[T]]]]) extends Tensor[T]:
101 |   def shape4D: (Int, Int, Int, Int) =
102 |     val cubes = data.headOption.map(_.length).getOrElse(0)
103 |     val rows = data.headOption.flatMap(_.headOption.map(_.length)).getOrElse(0)
104 |     val cols = for {
105 |       cube <- data.headOption
106 |       row <- cube.headOption
107 |       col <- row.headOption
108 |     } yield col.length
109 | 
110 |     (data.length, cubes, rows, cols.getOrElse(0))
111 |   
112 |   override def shape: List[Int] =
113 |     shape4D.toList    
114 | 
115 |   override def length: Int = data.length
116 | 
117 |   override def toString: String =    
118 |     printArray(data, meta)
119 |     
120 | object Tensor4D:
121 |   def apply[T: ClassTag](cubes: Tensor3D[T]*): Tensor4D[T] =
122 |     Tensor4D(cubes.toArray.map(t => t.data))


--------------------------------------------------------------------------------
/src/main/scala/examples/MnistLoader.scala:
--------------------------------------------------------------------------------
  1 | package examples
  2 | 
  3 | import ml.tensors.api._
  4 | import ml.tensors.ops._
  5 | 
  6 | import scala.collection.mutable.ArrayBuffer
  7 | import scala.reflect.ClassTag
  8 | import scala.util.Using
  9 | 
 10 | import java.io.{DataInputStream, BufferedInputStream, FileInputStream}
 11 | import java.nio.file.{Files, Path}
 12 | import java.util.zip.GZIPInputStream
 13 | 
 14 | // data to be taken from http://yann.lecun.com/exdb/mnist/ or at GitHub somewhere
 15 | object MnistLoader:
 16 |   val trainImagesFilename = "train-images-idx3-ubyte.gz"
 17 |   val trainLabelsFilename = "train-labels-idx1-ubyte.gz"
 18 |   val testImagesFilename = "t10k-images-idx3-ubyte.gz"
 19 |   val testLabelsFilename = "t10k-labels-idx1-ubyte.gz"
 20 | 
 21 |   val LabelFileMagicNumber = 2049
 22 |   val ImageFileMagicNumber = 2051
 23 | 
 24 |   case class MnistDataset[T: Numeric](
 25 |     trainImage: Tensor[T],
 26 |     trainLabels: Tensor[T],
 27 |     testImages: Tensor[T],
 28 |     testLabels: Tensor[T]
 29 |   )
 30 | 
 31 |   case class LoaderCfg(samples: Int, numberOfImages: Int, nRows: Int, nCols: Int)
 32 | 
 33 |   def loadData[T: Numeric: ClassTag](
 34 |       mnistDir: String,
 35 |       samples: Int = 60_000,
 36 |       flat: Boolean =  true
 37 |   ): MnistDataset[T] =
 38 |     val (trainImages, trainLabels) = loadDataset(
 39 |       Path.of(mnistDir, trainImagesFilename),
 40 |       Path.of(mnistDir, trainLabelsFilename),
 41 |       samples,
 42 |       flat
 43 |     )
 44 |     val (testImages, testLabels) = loadDataset(
 45 |       Path.of(mnistDir, testImagesFilename),
 46 |       Path.of(mnistDir, testLabelsFilename),
 47 |       samples,
 48 |       flat
 49 |     )
 50 |     MnistDataset(trainImages, trainLabels, testImages, testLabels)
 51 | 
 52 |   private def loadDataset[T: ClassTag](
 53 |       images: Path,
 54 |       labels: Path,
 55 |       samples: Int,
 56 |       flat: Boolean
 57 |   )(using n: Numeric[T]): (Tensor[T], Tensor[T]) =
 58 |     Using.resource(
 59 |       new DataInputStream(        
 60 |         new GZIPInputStream(Files.newInputStream(images))        
 61 |       )
 62 |     ) { imageInputStream =>
 63 |       val magicNumber = imageInputStream.readInt()
 64 |       assert(
 65 |         magicNumber == ImageFileMagicNumber,
 66 |         s"Image file magic number is incorrect, expected $ImageFileMagicNumber, but was $magicNumber"
 67 |       )
 68 | 
 69 |       val numberOfImages = imageInputStream.readInt()
 70 |       val (nRows, nCols) =
 71 |         (imageInputStream.readInt(), imageInputStream.readInt())
 72 | 
 73 |       val labelsTensor = Using.resource(
 74 |         new DataInputStream(        
 75 |           new GZIPInputStream(Files.newInputStream(labels))          
 76 |         )
 77 |       ) { labelInputStream =>
 78 |         val labelMagicNumber = labelInputStream.readInt()
 79 |         assert(
 80 |           labelMagicNumber == LabelFileMagicNumber,
 81 |           s"Image file magic number is incorrect, expected $LabelFileMagicNumber, but was $labelMagicNumber"
 82 |         )
 83 | 
 84 |         val numberOfLabels = labelInputStream.readInt()
 85 | 
 86 |         assert(
 87 |           numberOfImages == numberOfLabels,
 88 |           s"Number of images is not equal to number of labels, $numberOfImages != $numberOfLabels"
 89 |         )
 90 | 
 91 |         labelInputStream.readAllBytes
 92 |           .map(l => n.fromInt(l))
 93 |           .take(samples)
 94 |           .as1D
 95 |         }
 96 |         
 97 |       val cfg = LoaderCfg(samples, numberOfImages, nRows, nCols)
 98 |       val images = 
 99 |         if flat then readAsVector(cfg, imageInputStream) 
100 |         else readAsMatrix(cfg, imageInputStream)
101 | 
102 |       (images, labelsTensor)      
103 |     }
104 | 
105 |   private def readAsVector[T: ClassTag](cfg: LoaderCfg, imageInputStream: DataInputStream)(using n: Numeric[T]) = 
106 |     val images = ArrayBuffer.empty[Array[T]]
107 |     val singeImageSize = cfg.nRows * cfg.nCols
108 |     
109 |     for _ <- (0 until cfg.numberOfImages) do           
110 |       images += readNBytes(singeImageSize, imageInputStream)
111 |     
112 |     images.toArray.take(cfg.samples).as2D
113 | 
114 |   private def readAsMatrix[T: ClassTag](cfg: LoaderCfg, imageInputStream: DataInputStream)(using n: Numeric[T]) = 
115 |     val images = ArrayBuffer.empty[Array[Array[Array[T]]]]
116 | 
117 |     for _ <- (0 until cfg.numberOfImages) do
118 |       val image = ArrayBuffer.empty[Array[T]]
119 |       for _ <- (0 until cfg.nRows) do            
120 |         image += readNBytes(cfg.nCols, imageInputStream)
121 |       images += Array(image.toArray)    
122 |     
123 |     images.toArray.take(cfg.samples).as4D
124 | 
125 |   private def readNBytes[T: ClassTag](count: Int, is: DataInputStream)(using n: Numeric[T]) =
126 |     (0 until count).map(_ => n.fromInt(is.readUnsignedByte())).toArray


--------------------------------------------------------------------------------
/plots.sc:
--------------------------------------------------------------------------------
  1 | // scala 2.13.4
  2 | 
  3 | import $ivy. `org.carbonateresearch::picta:0.1.1`
  4 | import org.carbonateresearch.picta.render.Html.initNotebook // required to initialize jupyter notebook mode
  5 | initNotebook() // stops standard output
  6 | 
  7 | import org.carbonateresearch.picta.IO._
  8 | import org.carbonateresearch.picta._
  9 | 
 10 | val metricsDir = getWorkingDirectory + "/../metrics"
 11 | val filepath = metricsDir + "/lr.csv"
 12 | val data = readCSV(filepath)
 13 | val epochs = data("epoch").map(_.toInt)
 14 | val losses = data("loss").map(_.toDouble)
 15 | 
 16 | val series = XY(epochs, losses).asType(SCATTER).drawStyle(LINES)
 17 | val chart = Chart().addSeries(series.setName("Learning loss")).setTitle("Linear Regression Example: Loss vs. Epoch")
 18 | chart.plotInline
 19 | 
 20 | val filepath = s"$metricsDir/datapoints.csv"
 21 | val data = readCSV(filepath)
 22 | val x = data("x").map(_.toDouble)
 23 | val y = data("y").map(_.toDouble)
 24 | val w = 0.6911375732835148
 25 | val b = 0.7800122918798839
 26 | def model(x: Double) = w * x + b
 27 | val m1 = Array(-0.1d, 1.3d)
 28 | val m2 = List(model(m1(0)), model(m1(1)))
 29 | 
 30 | //val marker = Marker() setSymbol SQUARE_OPEN setColor "red"
 31 | val inputData = XY(x, y) asType SCATTER setName "Input Data" drawStyle MARKERS //setMarker marker
 32 | val modelData = XY(m1.toList, m2) asType SCATTER setName "Model" // drawStyle MARKERS
 33 | val chart = Chart() addSeries(inputData, modelData) setTitle("Data points vs. Trained model")
 34 | 
 35 | chart.plotInline
 36 | 
 37 | val filepath = metricsDir + "/ann.csv"
 38 | val data = readCSV(filepath)
 39 | val epochs = data("epoch").map(_.toInt)
 40 | val losses = data("loss").map(_.toDouble)
 41 | val accuracy = data("accuracy").map(_.toDouble)
 42 | val maxAccuracy = accuracy.max
 43 | val normAccuracy = accuracy.map(_ / maxAccuracy)
 44 | val maxLoss = losses.max
 45 | val normLoss = losses.map(_ / maxLoss)
 46 | 
 47 | val loss = XY(epochs, losses) asType SCATTER drawStyle LINES
 48 | val acc = XY(epochs, accuracy) asType SCATTER drawStyle LINES
 49 | val lossChart = 
 50 |   Chart() addSeries(
 51 |     loss.setName("Learning loss"), 
 52 |     acc.setName("Training Accuracy")
 53 |   ) setTitle "ANN Example: Loss vs. Accuracy vs. Epoch"     
 54 | lossChart.plotInline
 55 | 
 56 | val data = readCSV(s"$metricsDir/adam-lr-surface.csv")
 57 | val w = data("w").map(_.toDouble).reverse
 58 | val b = data("b").map(_.toDouble).reverse
 59 | val loss = data("l").map(_.split(",").map(_.toDouble)).reverse
 60 | val surface = XYZ(x=w, y=b, z=loss.flatten, n=loss(0).length).asType(SURFACE).setColorBar("Loss", RIGHT_SIDE)
 61 | 
 62 | val gradientData = readCSV(s"$metricsDir/adam-gradient.csv")
 63 | val gw = gradientData("w").map(_.toDouble).reverse
 64 | val gb = gradientData("b").map(_.toDouble).reverse
 65 | val gLoss = gradientData("loss").map(_.toDouble).reverse
 66 | val gradient = XYZ(x=gw, y=gb, z=gLoss).asType(SCATTER3D).setName("Gradient").drawLinesMarkers
 67 | 
 68 | val surfaceChart = Chart()
 69 |     .addSeries(gradient,surface)
 70 |     .setTitle("Loss Function Surface")
 71 |     .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO)
 72 |     .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss"))
 73 | surfaceChart.plotInline
 74 | 
 75 | import org.carbonateresearch.picta.options.Marker
 76 | import org.carbonateresearch.picta.SymbolShape._
 77 | import org.carbonateresearch.picta.options.AUTO
 78 | 
 79 | val contour = XYZ(x=w, y=b, z=loss.flatten, n=loss(0).length).asType(CONTOUR)
 80 | val adamdMarker = Marker().setColor("rgb(200,0,0)").setSymbol(SQUARE_OPEN)
 81 | val adamGradient = XY(x=gw, y=gb).asType(SCATTER).setName("Adam Gradient").setMarker(adamdMarker)
 82 | .drawLinesMarkers
 83 | 
 84 | val simpledGradientData = readCSV(s"$metricsDir/simplegd-gradient.csv")
 85 | val simpleGw = simpledGradientData("w").map(_.toDouble).reverse
 86 | val simpleGb = simpledGradientData("b").map(_.toDouble).reverse
 87 | val simpleGdmarker = Marker().setColor("rgb(0,200,0)").setSymbol(SQUARE_OPEN)
 88 | val simpleGDGradient = XY(x=simpleGw, y=simpleGb).asType(SCATTER)
 89 |     .setName("Classic Gradient Descent").setMarker(simpleGdmarker).drawLinesMarkers
 90 | 
 91 | val simpleGDAnimation = 
 92 |     (0 to simpleGw.length-1)
 93 |     .map(x => XY(simpleGw.take(x+1), simpleGb.take(x+1)) setName "Classic Gradient Descent")
 94 |     .toList
 95 | 
 96 | val adamAnimation = 
 97 |     (0 to gw.length-1)
 98 |     .map(x => XY(gw.take(x+1), gb.take(x+1)) setName "Adam")
 99 |     .toList
100 | 
101 | val animatedChart = 
102 |      Chart(animated = true, transition_duration=simpleGw.length, animate_multiple_series = true)
103 |      //.addSeries(contour)
104 |      .addSeries(simpleGDAnimation)
105 |      .addSeries(adamAnimation)
106 |      .setTitle("Gradient Trace")
107 |      .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO)
108 |      .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss"))
109 | 
110 | animatedChart.plotInline
111 | 
112 | val countourChart = Chart()
113 |      .addSeries(contour, adamGradient, simpleGDGradient)
114 |      .setTitle("Loss Contour")
115 |      .setLegend(x = 0.5, y = -0.5, orientation = HORIZONTAL, xanchor = AUTO, yanchor = AUTO)
116 |      .addAxes(Axis(X, title = "w"), Axis(Y, title = "b"), Axis(Z, title = "loss"))
117 | 
118 | countourChart.plotInline
119 | 


--------------------------------------------------------------------------------
/src/main/scala/ml/network/optimizers.scala:
--------------------------------------------------------------------------------
  1 | package ml.network
  2 | 
  3 | import ml.tensors.api._
  4 | import ml.tensors.ops._
  5 | import ml.transformation.castFromTo
  6 | 
  7 | import scala.collection.mutable.ListBuffer
  8 | import scala.reflect.ClassTag
  9 | import scala.math.Numeric.Implicits._
 10 | 
 11 | // suported Optimizers
 12 | type Adam
 13 | type StandardGD
 14 | type Stub
 15 | 
 16 | trait Optimizer[U]:
 17 | 
 18 |   def updateWeights[T: ClassTag](
 19 |       layers: List[Layer[T]],
 20 |       activations: List[Activation[T]],
 21 |       error: Tensor[T],
 22 |       cfg: OptimizerCfg[T],
 23 |       timestep: Int
 24 |   )(using n: Fractional[T]): List[Layer[T]]
 25 | 
 26 |   def init[T: ClassTag: Numeric](w: Tensor[T], b: Tensor[T]): Option[OptimizerParams[T]] = None
 27 | 
 28 | object optimizers:
 29 |   given Optimizer[Stub] with
 30 |     override def updateWeights[T: ClassTag](
 31 |         layers: List[Layer[T]],
 32 |         activations: List[Activation[T]],
 33 |         error: Tensor[T],
 34 |         c: OptimizerCfg[T],
 35 |         timestep: Int
 36 |     )(using n: Fractional[T]): List[Layer[T]] = layers
 37 | 
 38 |   given Optimizer[Adam] with        
 39 | 
 40 |     override def init[T: ClassTag: Numeric](w: Tensor[T], b: Tensor[T]): Option[OptimizerParams[T]] =
 41 |       Some(AdamState[T](w.zero, w.zero, b.zero, b.zero))
 42 | 
 43 |     override def updateWeights[T: ClassTag](
 44 |         layers: List[Layer[T]],
 45 |         activations: List[Activation[T]],
 46 |         error: Tensor[T],
 47 |         c: OptimizerCfg[T],
 48 |         timestep: Int
 49 |     )(using n: Fractional[T]): List[Layer[T]] =
 50 |       val AdamCfg(b1, b2, eps) = c.adam        
 51 | 
 52 |       def correction(gradient: Tensor[T], m: Tensor[T], v: Tensor[T]) =
 53 |         val mt = (b1 * m) + ((n.one - b1) * gradient)
 54 |         val vt = (b2 * v) + ((n.one - b2) * gradient.sqr)        
 55 |         val mHat = mt :/ (n.one - (b1 ** timestep))
 56 |         val vHat = vt :/ (n.one - (b2 ** timestep))            
 57 | 
 58 |         val corr = c.learningRate * (mHat / (vHat.sqrt + eps))
 59 |         (corr, mt, vt)
 60 |       
 61 |       layers
 62 |         .zip(activations)
 63 |         .foldRight(
 64 |           ListBuffer.empty[Layer[T]],
 65 |           error,
 66 |           None: Option[Tensor[T]]          
 67 |         ) {             
 68 |             case (
 69 |                   (layer, a),
 70 |                   (ls, prevDelta, prevWeight)
 71 |                 ) =>                                        
 72 |               val Gradient(delta, wOpt, bOpt) = layer.backward(a, prevDelta, prevWeight)
 73 |               val (updated, weight) = (layer, wOpt, bOpt) match
 74 |                 case (o: Optimizable[T], Some(w), Some(b)) =>
 75 |                   // Adam                        
 76 |                   o.optimizerParams match
 77 |                     case Some(AdamState(mw, vw, mb, vb)) =>
 78 |                       val wGradient = c.clip(w)
 79 |                       val bGradient = c.clip(b).sumRows
 80 |                       val batchSize = n.fromInt(a.x.length)                                   
 81 |                       val (corrW, weightM, weightV) = correction(wGradient :/ batchSize, mw, vw)                  
 82 |                       val (corrB, biasM, biasV) = correction(bGradient :/ batchSize, mb, vb)                  
 83 |                       val adamState = Some(AdamState(weightM, weightV, biasM, biasV))
 84 |                       (o.update(corrW, corrB, adamState), o.w)                    
 85 |                     case _ => 
 86 |                       (layer, None) // does nothing if Adam state is not set
 87 |                 case _ => 
 88 |                   (layer, None) // does nothing if one of the params is empty 
 89 |               (updated +: ls, delta, weight)                        
 90 |         }
 91 |         ._1.toList    
 92 | 
 93 |   given Optimizer[StandardGD] with
 94 | 
 95 |     override def updateWeights[T: ClassTag](
 96 |         layers: List[Layer[T]],
 97 |         activations: List[Activation[T]],
 98 |         error: Tensor[T],
 99 |         cfg: OptimizerCfg[T],
100 |         timestep: Int
101 |     )(using n: Fractional[T]): List[Layer[T]] =      
102 |       layers
103 |         .zip(activations)
104 |         .foldRight(
105 |           ListBuffer.empty[Layer[T]],
106 |           error,
107 |           None: Option[Tensor[T]]
108 |         ) {
109 |           case (
110 |                 (layer, a),
111 |                 (ls, prevDelta, prevWeight)
112 |               ) =>            
113 |             val Gradient(delta, w, b) = layer.backward(a, prevDelta, prevWeight)
114 |             val (updated, weight) = (layer, w, b) match
115 |               case (o: Optimizable[T], Some(w), Some(b)) =>
116 |                 val batchSize = n.fromInt(a.x.length)
117 |                 val wGradient = cfg.clip(w) :/ batchSize
118 |                 val bGradient = cfg.clip(b).sumRows :/ batchSize
119 |                 val corrW = cfg.learningRate * wGradient
120 |                 val corrB = cfg.learningRate * bGradient
121 |                 (o.update(corrW, corrB), o.w)
122 |               case _ => 
123 |                 (layer, None)
124 |             (updated +: ls, delta, weight)
125 |         }
126 |         ._1.toList    
127 | 
128 | case class OptimizerCfg[T: ClassTag: Fractional](
129 |   learningRate: T,
130 |   clip: GradientClipping[T] = GradientClippingApi.noClipping[T],
131 |   adam: AdamCfg[T]
132 | )
133 | 
134 | sealed trait OptimizerParams[T]
135 | 
136 | case class AdamState[T](mw: Tensor[T], vw: Tensor[T], mb: Tensor[T], vb: Tensor[T]) extends OptimizerParams[T]
137 | 
138 | case class AdamCfg[T: ClassTag](b1: T, b2: T, eps: T)
139 | 
140 | object AdamCfg:
141 | 
142 |   def default[T: ClassTag]: AdamCfg[T] =
143 |     AdamCfg[T](
144 |       castFromTo[Double, T](0.9),
145 |       castFromTo[Double, T](0.999),
146 |       castFromTo[Double, T](10E-8)
147 |     )
148 | 
149 | trait GradientClipping[T] extends (Tensor[T] => Tensor[T]) 
150 | 
151 | object GradientClippingApi:
152 |   def clipByValue[T: Fractional: ClassTag](value: T): GradientClipping[T] = 
153 |     _.clipInRange(-value, value)
154 |   
155 |   def clipByNorm[T: Fractional: ClassTag](value: T): GradientClipping[T] = 
156 |     _.clipByNorm(value)
157 | 
158 |   inline def noClipping[T]: GradientClipping[T] = t => t


--------------------------------------------------------------------------------
/src/test/scala/ml/network/Conv2DTest.scala:
--------------------------------------------------------------------------------
  1 | package ml.network
  2 | 
  3 | import ml.transformation.castFromTo
  4 | import ml.tensors.api._
  5 | import ml.tensors.ops._
  6 | import optimizers.given_Optimizer_Adam as adam
  7 | 
  8 | import scala.reflect.ClassTag
  9 | import scala.math.Numeric.Implicits._
 10 | import scala.collection.mutable.ListBuffer
 11 | 
 12 | import org.scalatest.flatspec.AnyFlatSpec
 13 | import org.scalatest.matchers.should.Matchers
 14 | 
 15 | class Conv2DTest extends AnyFlatSpec with Matchers {
 16 |   def testActivation[T: ClassTag](using n: Numeric[T]) = new ActivationFunc[T]:
 17 |       override def apply(x: Tensor[T]): Tensor[T] = x.map(_ + n.one)
 18 |       override def derivative(x: Tensor[T]): Tensor[T] = apply(x)
 19 |       override val name = "test"
 20 | 
 21 |   given testInit[T: ClassTag](using n: Numeric[T]): ParamsInitializer[T, RandomUniform] with    
 22 |     def gen: T = n.one
 23 | 
 24 |     override def weights(rows: Int, cols: Int): Tensor2D[T] =
 25 |       Tensor2D(Array.fill(rows)(Array.fill[T](cols)(gen)))
 26 | 
 27 |     override def biases(length: Int): Tensor1D[T] = 
 28 |       inits.zeros(length)
 29 |       
 30 |   val image1 = Tensor3D(Array(
 31 |       Array(
 32 |         Array(1d, 2, 3, 3), 
 33 |         Array(2d, 3, 4, 3), 
 34 |         Array(5d, 6, 7, 3)
 35 |       ),
 36 |       Array(
 37 |         Array(1d, 2, 3, 1), 
 38 |         Array(2d, 3, 4, 1), 
 39 |         Array(5d, 6, 7, 1)
 40 |       ),
 41 |       Array(
 42 |         Array(1d, 2, 3, 2), 
 43 |         Array(2d, 3, 4, 2), 
 44 |         Array(5d, 6, 7, 2)
 45 |       )
 46 |     ))
 47 | 
 48 |   val image2 = Tensor3D(Array(
 49 |       Array(
 50 |         Array(1d, 2, 3, 1), 
 51 |         Array(2d, 3, 4, 1), 
 52 |         Array(5d, 6, 7, 1)
 53 |       ),
 54 |       Array(
 55 |         Array(1d, 2, 3, 2), 
 56 |         Array(2d, 3, 4, 2), 
 57 |         Array(5d, 6, 7, 2)
 58 |       ),
 59 |       Array(
 60 |         Array(1d, 2, 3, 3), 
 61 |         Array(2d, 3, 4, 3), 
 62 |         Array(5d, 6, 7, 3)
 63 |       )
 64 |     ))    
 65 |   
 66 |   val images = Tensor4D(image1, image2)
 67 | 
 68 |   it should "do forward propagation" in {
 69 |     // given            
 70 |     val inputShape = images.shape4D
 71 | 
 72 |     val layer = Conv2D[Double](
 73 |       f = testActivation,
 74 |       filterCount = 3,
 75 |       kernel = (2, 2),
 76 |       strides = (1, 1)
 77 |     ).init(inputShape.toList, testInit, adam)    
 78 |     
 79 |     // when
 80 |     val activation = layer(images)    
 81 |     val (imageCount, inputChannels, width, height) = inputShape
 82 | 
 83 |     // then
 84 |     activation.z.shape should ===(List(imageCount, layer.filterCount, 2, 3))
 85 | 
 86 |     val w = layer.w.getOrElse(fail("Weight must not be empty"))
 87 |     val b = layer.b.getOrElse(fail("Bias must not be empty"))  
 88 | 
 89 |     def applyFilter[T: ClassTag: Numeric](filter: Array[Array[T]], window: Array[Array[T]]): T =
 90 |       filter.zip(window).map((a, b) => a.zip(b).map(_ * _).sum).sum
 91 | 
 92 |     def filterChannel[T: Numeric: ClassTag](channel: Array[Array[T]], filter: Array[Array[T]]) = 
 93 |       val rows = ListBuffer[Array[T]]()
 94 | 
 95 |       for i <- 0 to width - layer.kernel._1 by layer.strides._1 do  
 96 |         val img = channel.drop(i).take(layer.kernel._1)
 97 |         val row = ListBuffer.empty[T]
 98 | 
 99 |         for j <- 0 to height - layer.kernel._1 by layer.strides._2 do           
100 |           val window = img.map(_.drop(j).take(layer.kernel._2))                                
101 |           row += applyFilter(filter, window)            
102 | 
103 |         rows += row.toArray  
104 |       rows.toArray
105 | 
106 |     def filterChannels[T: ClassTag : Numeric](filters: Tensor4D[T], images: Tensor4D[T]) =
107 |       images.data.map { image => 
108 |         filters.data.map { channels => 
109 |           channels.zip(image).map { (fc, ic) =>
110 |             filterChannel(ic, fc).as2D
111 |           }.reduce(_ + _)       
112 |         }
113 |       }
114 |       
115 |     val expectedActivities = filterChannels(w.as4D, images).as4D
116 |             
117 |     val layerActivity = activation.z.as4D.data    
118 |     layerActivity.zip(expectedActivities.data).foreach { (actual, expected) =>      
119 |       actual should ===(expected)      
120 |     }
121 |     
122 |     val expectedActivation = layer.f(expectedActivities)
123 |     activation.a.as4D.data sameElements expectedActivation.as4D.data    
124 |   }
125 | 
126 |   it should "do backward propagation from max pooling layer" in {     
127 |     // given              
128 |     val inputShape = images.shape4D    
129 |     val convLayer = Conv2D[Double](
130 |       f = testActivation,
131 |       filterCount = 3,
132 |       kernel = (2, 2),
133 |       strides = (1, 1)
134 |     ).init(inputShape.toList, testInit, adam)
135 | 
136 |     // when
137 |     val a = convLayer(images)        
138 |     val poolingLayer = MaxPool[Double](padding = false).init(convLayer.shape)
139 |     val pooled = poolingLayer(a.a)
140 | 
141 |     val maxPoolDelta = Array.fill(images.length)(
142 |       Array(
143 |         Array(
144 |           Array(1d, 2)          
145 |         ),
146 |         Array(
147 |           Array(7d, 1)          
148 |         ),
149 |         Array(
150 |           Array(4d, 8)
151 |         )
152 |       )
153 |     )    
154 |     val Gradient(convDelta, _, _) = poolingLayer.backward(pooled, maxPoolDelta.as4D, None)
155 |     val Gradient(delta, Some(wGrad), Some(bGrad)) = convLayer.backward(a, convDelta, None)    
156 |     val Some(weightsShape) = convLayer.w.map(_.shape)
157 |     
158 |     // then
159 |     weightsShape should ===(wGrad.shape)
160 |     val expectedConvGrad = Tensor4D(Array(      
161 |       Array.fill(3)(Array(
162 |           Array(6d, 8),
163 |           Array(12d,14)
164 |         )), 
165 |       Array.fill(3)(Array(
166 |           Array(3.0,4.0),
167 |           Array(6.0,7.0)
168 |         )), 
169 |       Array.fill(3)(Array(
170 |           Array(24.0,32.0),
171 |           Array(48.0,56.0)
172 |       ))
173 |     )).map(_ * images.length).as4D.data    
174 | 
175 |     wGrad.as4D.data should ===(expectedConvGrad)        
176 |     val expectedDelta = Array.fill(2)(         
177 |         Array.fill(3)(
178 |           Array(
179 |             Array(0.0, 0.0, 0.0, 0.0), 
180 |             Array(0.0, 11.0, 11.0, 0.0), 
181 |             Array(0.0, 11.0, 11.0, 0.0)
182 |           )
183 |         ))    
184 |     delta.as4D.data should===(expectedDelta)    
185 | 
186 |     bGrad.shape should ===(List(3))
187 |     val expectedBias = Array(2d,1,8).map(_ * images.length)
188 |     bGrad.as1D.data should ===(expectedBias)
189 |   }
190 | }
191 | 


--------------------------------------------------------------------------------
/src/main/scala/ml/network/ann.scala:
--------------------------------------------------------------------------------
  1 | package ml.network 
  2 | 
  3 | import ml.transformation.castFromTo
  4 | import ml.tensors.api._
  5 | import ml.tensors.ops._
  6 | 
  7 | import Model._
  8 | import Sequential._
  9 | 
 10 | import scala.collection.mutable.ListBuffer
 11 | import scala.reflect.ClassTag
 12 | import scala.util.Random
 13 | 
 14 | /*
 15 |  * z - before activation = w * x
 16 |  * a - activation value
 17 |  */
 18 | case class Activation[T](x: Tensor[T], z: Tensor[T], a: Tensor[T])
 19 | 
 20 | sealed trait Model[T]:
 21 |   def reset(): Model[T]
 22 |   def train(x: Tensor[T], y: Tensor[T], epochs: Int, shuffle: Boolean = true): Model[T]
 23 |   def layers: List[Layer[T]]
 24 |   def predict(x: Tensor[T], customLayers: List[Layer[T]] = layers): Tensor[T]
 25 |   def apply(x: Tensor[T], customLayers: List[Layer[T]] = layers): Tensor[T] = 
 26 |     predict(x, customLayers)
 27 |   def history: TrainHistory[T]
 28 |   def metricValues: MetricValues[T]
 29 | 
 30 | object Model:
 31 |   def getAvgLoss[T: ClassTag](losses: List[T])(using n: Fractional[T]): T =
 32 |     castFromTo[Double, T](n.toDouble(losses.sum) / losses.length)
 33 | 
 34 | object Sequential:
 35 |   def activate[T: Numeric: ClassTag](
 36 |       input: Tensor[T],
 37 |       layers: List[Layer[T]]
 38 |   ): List[Activation[T]] =
 39 |     layers
 40 |       .foldLeft(input, ListBuffer.empty[Activation[T]]) {
 41 |         case ((x, acc), layer) =>
 42 |           val act = layer(x)          
 43 |           (act.a, acc :+ act)
 44 |       }
 45 |       ._2
 46 |       .toList
 47 | 
 48 | case class TrainHistory[T](layers: List[List[Layer[T]]] = Nil, losses: List[T] = Nil)
 49 | 
 50 | type MetricValues[T] = List[(Metric[T], List[Double])]
 51 | 
 52 | case class Sequential[T: ClassTag: Fractional, U, V](
 53 |     lossFunc: Loss[T],    
 54 |     learningRate: T,
 55 |     metrics: List[Metric[T]] = Nil,
 56 |     batchSize: Int = 16,
 57 |     layerStack: List[Int] => List[Layer[T]] = _ => List.empty[Layer[T]],    
 58 |     layers: List[Layer[T]] = Nil,
 59 |     history: TrainHistory[T] = TrainHistory[T](),    
 60 |     metricValues: MetricValues[T] = Nil,
 61 |     gradientClipping: GradientClipping[T] = GradientClippingApi.noClipping[T],
 62 |     cfg: Option[OptimizerCfg[T]] = None,
 63 |     printStepTps: Boolean = false
 64 | )(using optimizer: Optimizer[U], initializer: ParamsInitializer[T, V]) extends Model[T]:
 65 | 
 66 |   private val optimizerCfg = 
 67 |     cfg.getOrElse(OptimizerCfg(learningRate = learningRate, gradientClipping, AdamCfg.default))
 68 | 
 69 |   def withCfg(cfg: OptimizerCfg[T]) =
 70 |     copy(cfg = Some(cfg))
 71 | 
 72 |   def predict(x: Tensor[T], inputLayers: List[Layer[T]] = layers): Tensor[T] =
 73 |     activate(x, inputLayers).last.a
 74 | 
 75 |   def loss(x: Tensor[T], y: Tensor[T], w: List[Layer[T]]): T =
 76 |     val predicted = predict(x, w)    
 77 |     lossFunc(y, predicted)  
 78 | 
 79 |   def add(layer: Layer[T]): Sequential[T, U, V] =
 80 |     copy(layerStack = inputShape => 
 81 |       val currentLayers = layerStack(inputShape)
 82 |       val prevShape = currentLayers.lastOption.map(_.shape).getOrElse(inputShape)
 83 |       val initialized = layer match
 84 |         case o: Optimizable[_] => o.init(prevShape, initializer, optimizer)
 85 |         case _ => layer.init(prevShape)
 86 |       (currentLayers :+ initialized)
 87 |     )
 88 | 
 89 |   private def trainEpoch(
 90 |       batches: Array[(Tensor[T], Tensor[T])],
 91 |       layers: List[Layer[T]],
 92 |       epoch: Int
 93 |   ) =    
 94 |     val (trained, losses, metricValue, _) =
 95 |       batches.zipWithIndex.foldLeft(
 96 |         layers, 
 97 |         ListBuffer.empty[T], 
 98 |         ListBuffer.fill(metrics.length)(0),
 99 |         0L) {
100 |         case ((layers, batchLoss, epochMetrics, stepDuration), ((x, y), i)) =>
101 |           // forward
102 |           val start = System.currentTimeMillis()
103 |           val activations = activate(x, layers)             
104 |           val predicted = activations.last.a
105 |           val error = predicted - y          
106 |           val loss = lossFunc(y, predicted)
107 | 
108 |           // backward          
109 |           val updated = optimizer.updateWeights(
110 |             layers,
111 |             activations,
112 |             error,
113 |             optimizerCfg,
114 |             (i + 1) * epoch
115 |           )
116 | 
117 |           // update metrics
118 |           val matches = metrics
119 |             .map(_.matches(y, predicted))
120 |             .zip(epochMetrics).map(_ + _)
121 |           val duration = stepDuration + (System.currentTimeMillis() - start)
122 |           printEpochPerformance(i + 1, duration)
123 | 
124 |           (updated, batchLoss :+ loss, matches.to(ListBuffer), duration)
125 |       }    
126 |     (trained, getAvgLoss(losses.toList), metricValue)
127 |   
128 |   inline private def printEpochPerformance(step: Int, duration: Long) = 
129 |     if printStepTps && step % 50 == 0 then
130 |       println(s"${step.toDouble / (duration / 1000d)} steps/sec")
131 | 
132 |   def train(x: Tensor[T], y: Tensor[T], epochs: Int, shuffle: Boolean = true): Model[T] =
133 |     lazy val actualBatches = y.batches(batchSize).toArray
134 |     lazy val batches = x.batches(batchSize).zip(actualBatches).toArray
135 |     def getBatches = if shuffle then Random.shuffle(batches).toArray else batches
136 |     val currentLayers = getOrInitLayers(x.shape)
137 |     val initialMetrics = metrics.map(_ -> List.empty[Double])
138 |     println(s"Running $epochs epochs")
139 | 
140 |     val (updatedLayers, lHistory, epochLosses, metricValues) =
141 |       (1 to epochs).foldLeft(currentLayers, ListBuffer.empty[List[Layer[T]]], ListBuffer.empty[T], initialMetrics) {
142 |         case ((layers, lHistory, losses, trainingMetrics), epoch) =>
143 |           val start = System.currentTimeMillis()
144 |           val (trainedLayers, avgLoss, epochMatches) = trainEpoch(getBatches, layers, epoch)
145 |           val duration = System.currentTimeMillis() - start
146 |           
147 |           val (epochMetrics, epochMetricAvg) = updateMetrics(epochMatches.toList, trainingMetrics, x.length)
148 |           printMetrics(epoch, epochs, avgLoss, epochMetricAvg, duration)          
149 | 
150 |           (trainedLayers, lHistory :+ trainedLayers, losses :+ avgLoss, epochMetrics)
151 |       }
152 | 
153 |     copy(
154 |       layers = updatedLayers, 
155 |       history = history.copy(losses = epochLosses.toList, layers = lHistory.toList), 
156 |       metricValues = metricValues
157 |     )
158 | 
159 |   private def updateMetrics(    
160 |     observedMatches: List[Int],     
161 |     currentMetrics: MetricValues[T],
162 |     samples: Int
163 |   ) =
164 |     val observedAvg = metrics.zip(observedMatches).map((m, matches) => m -> m.average(samples, matches))    
165 |     val updatedMetrics = observedAvg.zip(currentMetrics).map { 
166 |       case ((_, v), (currentMetric, values)) => currentMetric -> (values :+ v)
167 |     }
168 |     (updatedMetrics, observedAvg)
169 | 
170 |   private def printMetrics(epoch: Int, epochs: Int, avgLoss: T, values: List[(Metric[T], Double)], duration: Long) = 
171 |     val metricsStat = values
172 |       .map((m, avg) => s"${m.name}: $avg")
173 |       .mkString(", metrics: [", ";", "]")
174 |     println(
175 |       s"epoch: $epoch/$epochs, duration: ${duration/1000} sec, avg. loss: $avgLoss${if metrics.nonEmpty then metricsStat else ""}"
176 |     )
177 | 
178 |   def reset(): Model[T] =
179 |     copy(layers = Nil)
180 | 
181 |   private def getOrInitLayers(inputShape: List[Int]) =
182 |     if layers.isEmpty then layerStack(inputShape)
183 |     else layers


--------------------------------------------------------------------------------
/src/main/scala/ml/network/layers.scala:
--------------------------------------------------------------------------------
  1 | package ml.network
  2 | 
  3 | import ml.tensors.api._
  4 | import ml.tensors.ops._
  5 | 
  6 | import scala.collection.mutable.ListBuffer
  7 | import scala.reflect.ClassTag
  8 | import scala.math.Numeric.Implicits._
  9 | import scala.collection.parallel.CollectionConverters._
 10 | 
 11 | final case class Gradient[T](
 12 |   delta: Tensor[T], 
 13 |   w: Option[Tensor[T]] = None, 
 14 |   b: Option[Tensor[T]] = None
 15 | )
 16 | 
 17 | trait Layer[T]:
 18 |   val f: ActivationFunc[T] = ActivationFuncApi.linear  
 19 |   val shape: List[Int]  
 20 | 
 21 |   def init[U, V](prevShape: List[Int]): Layer[T] = this
 22 |   def apply(x: Tensor[T]): Activation[T]
 23 |   def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T]
 24 | 
 25 |   override def toString() = 
 26 |     s"\nf = ${f.name},\nshape = $shape"
 27 | 
 28 | trait Optimizable[T] extends Layer[T]:
 29 |   val w: Option[Tensor[T]]
 30 |   val b: Option[Tensor[T]]
 31 |   val optimizerParams: Option[OptimizerParams[T]]
 32 |   
 33 |   def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Layer[T]
 34 |   
 35 |   def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T]  
 36 |   
 37 |   override def toString() = 
 38 |     s"(${super.toString},\nweight = $w,\nbias = $b)"
 39 | 
 40 | case class Dense[T: ClassTag](
 41 |     override val f: ActivationFunc[T] = ActivationFuncApi.linear[T],
 42 |     units: Int = 1,
 43 |     shape: List[Int] = Nil,
 44 |     w: Option[Tensor[T]] = None,
 45 |     b: Option[Tensor[T]] = None,
 46 |     optimizerParams: Option[OptimizerParams[T]] = None
 47 | )(using n: Fractional[T]) extends Optimizable[T]:
 48 | 
 49 |   override def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Layer[T] =
 50 |     val inputs = prevShape.drop(1).reduce(_ * _)
 51 |     val w = initializer.weights(inputs, units)
 52 |     val b = initializer.biases(units)
 53 |     val optimizerParams = optimizer.init(w, b)
 54 |     copy(w = Some(w), b = Some(b), shape = List(inputs, units), optimizerParams = optimizerParams)
 55 | 
 56 |   override def apply(x: Tensor[T]): Activation[T] =
 57 |     val z = x * w + b 
 58 |     val a = f(z)
 59 |     Activation(x, z, a)
 60 | 
 61 |   override def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T] =
 62 |     val updatedW = w.map(_ - wGradient)  
 63 |     val updatedB = b.map(_ - bGradient)  
 64 |     copy(w = updatedW, b = updatedB, optimizerParams = optimizerParams)
 65 | 
 66 |   override def backward(a: Activation[T], prevDelta: Tensor[T], prevWeight: Option[Tensor[T]]): Gradient[T] = 
 67 |     val delta = (prevWeight match 
 68 |       case Some(pw) => prevDelta * pw.T
 69 |       case None     => prevDelta
 70 |     ) |*| f.derivative(a.z)
 71 | 
 72 |     val wGradient = Some(a.x.T * delta)
 73 |     val bGradient = Some(delta)
 74 |     Gradient(delta, wGradient, bGradient)
 75 | 
 76 | case class Conv2D[T: ClassTag](
 77 |     override val f: ActivationFunc[T],
 78 |     filterCount: Int = 1,   
 79 |     kernel: (Int, Int) = (2, 2), 
 80 |     strides: (Int, Int) = (1, 1), 
 81 |     shape: List[Int] = Nil, 
 82 |     w: Option[Tensor[T]] = None,
 83 |     b: Option[Tensor[T]] = None,
 84 |     optimizerParams: Option[OptimizerParams[T]] = None
 85 | )(using n: Fractional[T]) extends Optimizable[T]:
 86 | 
 87 |   override def init[U, V](prevShape: List[Int], initializer: ParamsInitializer[T, V], optimizer: Optimizer[U]): Conv2D[T] =        
 88 |     val images :: channels :: height :: width :: _ = prevShape    
 89 |     val w = initializer.weights4D(List(filterCount, channels, kernel._1, kernel._2))
 90 |     val b = initializer.biases(filterCount)
 91 |     val optimizerParams = optimizer.init(w, b)        
 92 |     val rows = (height - kernel._1) / strides._1 + 1
 93 |     val cols = (width - kernel._2) / strides._2 + 1
 94 |     val shape = List(images, filterCount, rows, cols)    
 95 |     copy(w = Some(w), b = Some(b), shape = shape, optimizerParams = optimizerParams)
 96 |   
 97 |   override def apply(x: Tensor[T]): Activation[T] =
 98 |     val z = (w, b) match
 99 |       case (Some(w), Some(b)) => forward(kernel, strides, x, w, b)
100 |       case _ => x // does nothing when one of the params is empty    
101 |     val a = f(z)    
102 |     Activation(x, z, a)
103 | 
104 |   private def forward(kernel: (Int, Int), stride: (Int, Int), x: Tensor[T], w: Tensor[T], b: Tensor[T]): Tensor[T] =
105 |     val (images, filters) = (x.as4D, w.as4D)    
106 |     
107 |     def filterImage(image: Array[Array[Array[T]]]) =
108 |       filters.data.zip(b.as1D.data).map { (f, b) =>
109 |         val filtered = f.zip(image).map { (fc, ic) =>
110 |           conv(fc.as2D, ic.as2D, kernel, stride)
111 |         }.reduce(_ + _)
112 |         filtered + b.asT
113 |       }
114 |     
115 |     images.data.par.map(filterImage).toArray.as4D    
116 |   
117 |   private def conv(filterChannel: Tensor2D[T], imageChannel: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) =    
118 |     val filtered = 
119 |       for row <- imageRegions(imageChannel, kernel, stride) yield
120 |         for (region, _, _) <- row yield
121 |           (region |*| filterChannel).sum
122 | 
123 |     filtered.as2D
124 |   
125 |   private def fullConv(filter: Tensor2D[T], loss: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int), rows: Int, cols: Int) =     
126 |     val out = Array.ofDim(rows, cols)
127 |     
128 |     for i <- 0 until kernel._1 do
129 |       for j <- 0 until kernel._2 do                
130 |         val delta = filter * loss.data(i)(j)
131 |         val (x, y) = (i * stride._1, j * stride._2)
132 |         
133 |         val iter = delta.as2D.data.flatten.iterator
134 |         for k <- x until x + kernel._1 do
135 |           for l <- y until y + kernel._2 do            
136 |             out(k)(l) += iter.next
137 | 
138 |     out.as2D
139 | 
140 |   private def calcGradient(loss: Tensor2D[T], image: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) =        
141 |     val grad = 
142 |       for (region, i, j) <- imageRegions(image, kernel, stride).flatten
143 |       yield region * loss.data(i)(j)    
144 | 
145 |     grad.reduce(_ + _).as2D
146 | 
147 |   private def imageRegions(image: Tensor2D[T], kernel: (Int, Int), stride: (Int, Int)) =
148 |     val (rows, cols) = image.shape2D    
149 |     for i <- 0 to rows - kernel._1 by stride._1 yield   
150 |       for j <- 0 to cols - kernel._2 by stride._2 yield       
151 |         (image.slice((i, i + kernel._1), (j, j + kernel._2)).as2D, i, j)    
152 | 
153 |   override def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T] =    
154 |     (w, b) match 
155 |       case (Some(w), Some(b)) =>
156 |         val prevLoss = prevDelta.as4D // nImages, filters, rows, cols
157 |         val x = a.x.as4D                            
158 |         
159 |         def imageGrad(imageChannels: Array[Array[Array[T]]], lossChannels: Array[Array[Array[T]]]) =          
160 |           lossChannels.map { lc =>
161 |             imageChannels.map { ic =>
162 |               calcGradient(lc.as2D, ic.as2D,  kernel, strides)
163 |             }
164 |           }
165 |         
166 |         val wGradient = x.data.zip(prevLoss.data)
167 |           .par.map(imageGrad)
168 |           .reduce {
169 |             (image1, image2) =>
170 |               image1.zip(image2).map { (channels1, channels2) =>
171 |                 channels1.zip(channels2).map(_ + _)
172 |               }
173 |           }.as4D
174 | 
175 |         val bGradient = prevLoss.data
176 |           .par.map(_.map(_.sum))
177 |           .reduce(_ + _)
178 |           .as1D
179 | 
180 |         val (_, _, rows, cols) = x.shape4D
181 |         val delta = prevLoss.data.par.map { lossChannels =>          
182 |           w.as4D.data.map { channels =>
183 |             lossChannels.zip(channels).map { (lc, fc) =>
184 |               fullConv(fc.as2D, lc.as2D, kernel, strides, rows, cols)
185 |             }.reduce(_ + _)
186 |           }
187 |         }.toArray.as4D
188 | 
189 |         Gradient(delta, Some(wGradient), Some(bGradient))
190 |       case _ =>    
191 |         Gradient(prevDelta)
192 |   
193 |   override def update(wGradient: Tensor[T], bGradient: Tensor[T], optimizerParams: Option[OptimizerParams[T]] = None): Layer[T] =
194 |     val updatedW = w.map(_ - wGradient)  
195 |     val updatedB = b.map(_ - bGradient)  
196 |     copy(w = updatedW, b = updatedB, optimizerParams = optimizerParams)    
197 | 
198 | case class MaxPool[T: ClassTag: Numeric](
199 |     window: (Int, Int) = (2, 2), 
200 |     strides: (Int, Int) = (1, 1),     
201 |     shape: List[Int] = Nil,
202 |     shape2D: (Int, Int) = (0, 0),
203 |     padding: Boolean = true
204 | ) extends Layer[T]: 
205 | 
206 |   override def init[U, V](prevShape: List[Int]): Layer[T] =
207 |     val (a :: b :: rows :: cols :: _) = prevShape
208 |     val pad = if padding then 1 else 0
209 |     val height = (rows - window._1 + pad) / strides._1 + 1
210 |     val width = (cols - window._2 + pad) / strides._2 + 1
211 |     val shape = List(a, b, height, width)
212 |     copy(shape = shape, shape2D = (height, width))
213 | 
214 |   def apply(x: Tensor[T]): Activation[T] =        
215 |     val pooled = x.as4D.data.map(_.map(c => poolMax(c.as2D))).as4D
216 |     Activation(x, pooled, pooled)
217 |   
218 |   private def imageRegions(image: Tensor2D[T], window: (Int, Int), strides: (Int, Int)) =
219 |     val (rows, cols) = shape2D
220 |     for i <- 0 until rows by strides._1 yield   
221 |       for j <- 0 until cols by strides._2 yield          
222 |         (image.slice((i, i + window._1), (j, j + window._2)).as2D, i, j)
223 |         
224 |   private def poolMax(image: Tensor2D[T]): Tensor2D[T] =
225 |     val (rows, cols) = shape2D
226 |     val out = Array.ofDim(rows, cols)
227 |     val pooled = 
228 |       for (region, i, j) <- imageRegions(image, window, strides).flatten yield            
229 |         out(i)(j) = region.max
230 |     out.as2D
231 | 
232 |   private def maxIndex(matrix: Tensor2D[T]): (Int, Int) =    
233 |     val maxPerRow = matrix.data.zipWithIndex.map((row, i) => (row.max, i, row.indices.maxBy(row)))
234 |     maxPerRow.maxBy(_._1).tail    
235 | 
236 |   def backward(a: Activation[T], prevDelta: Tensor[T], preWeight: Option[Tensor[T]]): Gradient[T] =    
237 |     val images = a.x.as4D.data
238 |     val delta = images.zip(prevDelta.as4D.data).par.map { (imageChannels, deltaChannels) =>
239 |       imageChannels.zip(deltaChannels).map { (ic, dc) =>
240 |         val image = ic.as2D   
241 |         val out = image.zero.as2D.data        
242 |         for (region, i, j) <- imageRegions(image, window, strides).flatten yield            
243 |           val (a, b) = maxIndex(region)        
244 |           out(i + a)(j + b) = dc(i)(j)      
245 |         out
246 |       }
247 |     }
248 |     Gradient(delta.toArray.as4D)
249 | 
250 | case class Flatten2D[T: ClassTag: Numeric](
251 |   shape: List[Int] = Nil,
252 |   prevShape: List[Int] = Nil
253 | ) extends Layer[T]:
254 | 
255 |   override def init[U, V](prevShape: List[Int]): Layer[T] =
256 |     val (head :: tail ) = prevShape
257 |     val shape = List(head, tail.reduce(_ * _))
258 |     copy(shape = shape, prevShape = prevShape)
259 | 
260 |   def apply(x: Tensor[T]): Activation[T] =
261 |     val flat = x.as2D
262 |     Activation(x, flat, flat)
263 | 
264 |   def backward(a: Activation[T], prevDelta: Tensor[T], prevWeight: Option[Tensor[T]]): Gradient[T] =    
265 |     val delta = (prevWeight match 
266 |       case Some(pw) => prevDelta * pw.T
267 |       case None     => prevDelta
268 |     ) //|*| f.derivative(a.z) //TODO: is any z multiply required here?
269 | 
270 |     val (filters :: rows :: cols :: _) = prevShape.drop(1)
271 |     val unflatten = delta.reshape(List(filters, rows, cols))
272 |     Gradient(unflatten)


--------------------------------------------------------------------------------
/src/main/scala/ml/tensors/ops.scala:
--------------------------------------------------------------------------------
  1 | package ml.tensors
  2 | 
  3 | import ml.tensors.api._
  4 | import ml.transformation.castFromTo
  5 | 
  6 | import scala.reflect.ClassTag
  7 | import scala.collection.mutable.ArrayBuffer
  8 | import math.Numeric.Implicits.infixNumericOps
  9 | import math.Ordering.Implicits.infixOrderingOps
 10 | import math.Fractional.Implicits.infixFractionalOps
 11 | import math.Integral.Implicits.infixIntegralOps
 12 | 
 13 | private trait genOps:    
 14 |   extension [T: ClassTag: Numeric](t: Tensor[T])
 15 |     // dot product    
 16 |     def *(that: Tensor[T]): Tensor[T] = TensorOps.mul(t, that)    
 17 |     def *(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMul(t, that)    
 18 |     def *(that: T): Tensor[T] = TensorOps.mul(t, Tensor0D(that))
 19 |     def -(that: T): Tensor[T] = TensorOps.subtract(t, Tensor0D(that))
 20 |     def -(that: Tensor[T]): Tensor[T] = TensorOps.subtract(t, that)
 21 |     def +(that: Tensor[T]): Tensor[T] = TensorOps.plus(t, that)    
 22 |     def +(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optPlus(t, that)    
 23 |     def +(that: T): Tensor[T] = TensorOps.plus(t, Tensor0D(that))    
 24 |     def sum: T = TensorOps.sum(t)        
 25 |     def split(fraction: Float): (Tensor[T], Tensor[T]) = TensorOps.split(fraction, t)
 26 |     
 27 |     // Hadamard product
 28 |     def multiply(that: Tensor[T]): Tensor[T] = TensorOps.multiply(t, that)
 29 |     def multiply(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMultiply(t, that)
 30 |     def |*|(that: Tensor[T]): Tensor[T] = TensorOps.multiply(t, that)
 31 |     def |*|(that: Option[Tensor[T]]): Tensor[T] = TensorOps.optMultiply(t, that)
 32 | 
 33 |     def batches(batchSize: Int): Iterator[Tensor[T]] = TensorOps.batches(t, batchSize)
 34 |     def equalRows(that: Tensor[T]): Int = TensorOps.equalRows(t, that)
 35 |     def clipInRange(min: T, max: T): Tensor[T] = TensorOps.clipInRange(t, min, max)    
 36 |     def :**(to: Int): Tensor[T] = TensorOps.pow(t, to)
 37 |     def sqr: Tensor[T] = TensorOps.pow(t, 2)    
 38 |     def sqrt: Tensor[T] = TensorOps.sqrt(t)
 39 |     def zero: Tensor[T] = TensorOps.zero(t)
 40 |     def argMax: Tensor[T] = TensorOps.argMax(t)
 41 |     def outer(that: Tensor[T]) = TensorOps.outer(t, that)
 42 |     def flatten: Tensor[T] = TensorOps.flatten(t)
 43 |     def diag: Tensor[T] = TensorOps.diag(t)
 44 |     def sumRows: Tensor[T] = TensorOps.sumRows(t)    
 45 |     def sumCols: Tensor[T] = TensorOps.sumCols(t)
 46 |     def max: T = TensorOps.max(t)
 47 |     def reshape(shape: List[Int]): Tensor[T] = TensorOps.reshape(t, shape)
 48 |  
 49 |   extension [T: ClassTag: Fractional](t: Tensor[T])
 50 |     def clipByNorm(norm: T): Tensor[T] = TensorOps.clipByNorm(t, norm)
 51 |     def /(that: Tensor[T]): Tensor[T] = TensorOps.div(t, that)
 52 |     def :/(that: T): Tensor[T] = TensorOps.div(t, Tensor0D(that))
 53 | 
 54 |   extension [T: ClassTag](t: Tensor[T])
 55 |     def T: Tensor[T] = TensorOps.transpose(t)    
 56 |     def map[U: ClassTag](f: T => U): Tensor[U] = TensorOps.map[T, U](t, f)          
 57 |     def mapRow[U: ClassTag](f: Array[T] => Array[U]): Tensor[U] = TensorOps.mapRow[T, U](t, f)          
 58 | 
 59 | object ops extends genOps:
 60 |   extension [T: ClassTag](t: Tensor2D[T])
 61 |     def col(i: Int): Tensor1D[T] = Tensor1D(TensorOps.col(t.data, i))
 62 |     def T: Tensor2D[T] = TensorOps.transpose(t).asInstanceOf[Tensor2D[T]]
 63 |     def slice(
 64 |       rows: Option[(Int, Int)],
 65 |       cols: Option[(Int, Int)]
 66 |     ): Tensor2D[T] =
 67 |       Tensor2D(t.data.slice(rows, cols))
 68 |     def slice(
 69 |       rows: (Int, Int),
 70 |       cols: (Int, Int)
 71 |     ): Tensor2D[T] =
 72 |       Tensor2D(TensorOps.sliceArr(t.data, rows, cols))      
 73 |     
 74 |   extension [T: ClassTag: Numeric](t: Tensor2D[T])
 75 |     def |*|(that: Tensor2D[T]): Tensor2D[T] = TensorOps.multiply(t, that).asInstanceOf[Tensor2D[T]]
 76 |     def +(that: Tensor[T]): Tensor2D[T] = TensorOps.plus(t, that).asInstanceOf[Tensor2D[T]]    
 77 | 
 78 |   extension [T: ClassTag](t: Tensor[T])
 79 |     def as0D: Tensor0D[T] = TensorOps.as0D(t)    
 80 |     def as1D: Tensor1D[T] = TensorOps.as1D(t)    
 81 |     def as2D: Tensor2D[T] = TensorOps.as2D(t)    
 82 |     def as3D: Tensor3D[T] = TensorOps.as3D(t)    
 83 |     def as4D: Tensor4D[T] = TensorOps.as4D(t)    
 84 | 
 85 |   extension [T: ClassTag](t: T)
 86 |     def asT: Tensor[T] = Tensor0D(t)
 87 |     def as0D: Tensor0D[T] = Tensor0D(t)
 88 |     def as1D: Tensor1D[T] = Tensor1D(Array(t))
 89 |     def as2D: Tensor2D[T] = Tensor2D(Array(Array(t)))       
 90 |     
 91 |   extension [T: ClassTag](t: T)(using n: Numeric[T])    
 92 |     def **(to: Int): T = castFromTo[Double, T](math.pow(n.toDouble(t), to))
 93 |   
 94 |   implicit class Tensor0DOps[T: ClassTag: Numeric](val t: T):
 95 |     // dot product
 96 |     def *(that: Tensor[T]): Tensor[T] = TensorOps.mul(Tensor0D(t), that)
 97 |     def -(that: Tensor[T]): Tensor[T] = TensorOps.subtract(Tensor0D(t), that)    
 98 |     def +(that: Tensor[T]): Tensor[T] = TensorOps.plus(Tensor0D(t), that)    
 99 |   
100 |   extension [T: ClassTag: Numeric](a: Array[T])
101 |     def as1D: Tensor1D[T] = Tensor1D(a)
102 |     def as2D: Tensor2D[T] = Tensor2D(a)
103 | 
104 |   extension [T: ClassTag](a: Array[T])(using n: Numeric[T])
105 |     def +(b: Array[T]): Array[T] = a.zip(b).map(n.plus)
106 |   
107 |   extension [T: ClassTag: Numeric](a: Array[Array[T]])
108 |     def as2D: Tensor2D[T] = Tensor2D(a)
109 |     def sum: T = a.map(_.sum).sum
110 |   
111 |   extension [T: ClassTag: Numeric](a: IndexedSeq[IndexedSeq[T]])
112 |     def as2D: Tensor2D[T] = Tensor2D(a.map(_.toArray).toArray)
113 |   
114 |   extension [T: ClassTag: Numeric](a: Array[Tensor2D[T]])
115 |     def as3D: Tensor3D[T] = Tensor3D(a:_*)
116 |   
117 |   extension [T: ClassTag: Numeric](a: Array[Array[Array[T]]])
118 |     def as3D: Tensor3D[T] = Tensor3D(a)
119 |   
120 |   extension [T: ClassTag: Numeric](a: Array[Array[Array[Array[T]]]])
121 |     def as4D: Tensor4D[T] = Tensor4D(a)
122 |   
123 |   extension [T: ClassTag: Numeric](a: Array[Array[Tensor2D[T]]])
124 |     def as4D: Tensor4D[T] = Tensor4D(a.map(_.map(_.data)))
125 | 
126 |   extension [T: ClassTag](a: Array[Array[T]])
127 |     def col(i: Int): Array[T] = TensorOps.col(a, i)
128 |     def slice(          
129 |           rows: Option[(Int, Int)] = None,
130 |           cols: Option[(Int, Int)] = None
131 |       ): Array[Array[T]] = TensorOps.slice(a, rows, cols)
132 | 
133 |   extension [T: ClassTag: Numeric](pair: (Tensor[T], Tensor[T]))
134 |     def map2[U: ClassTag: Numeric](f: (T, T) => U): Tensor[U] = 
135 |       TensorOps.map2(pair._1, pair._2, f)  
136 | 
137 |     def split(
138 |         fraction: Float
139 |     ): ((Tensor[T], Tensor[T]), (Tensor[T], Tensor[T])) =
140 |       TensorOps.split(fraction, pair)
141 | 
142 |   extension [T: ClassTag](t: Tensor1D[T])
143 |     def batchColumn(batchSize: Int): Iterator[Array[T]] =
144 |       t.data.grouped(batchSize)
145 | 
146 | object TensorOps:
147 | 
148 |   def subtract[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] =
149 |     (a, b) match
150 |       case (Tensor1D(data), Tensor1D(data2)) =>        
151 |         checkShapeEquality(a, b)        
152 |         Tensor1D(data.zip(data2).map(_ - _))
153 |       case (Tensor2D(data), Tensor2D(data2)) =>        
154 |         checkShapeEquality(a, b)    
155 |         Tensor2D(matrixMinusMatrix(data, data2))
156 |       case (Tensor2D(data), Tensor0D(data2)) => // broadcasting
157 |         Tensor2D(data.map(_.map(_ - data2)))
158 |       case (Tensor0D(data), Tensor2D(data2)) => // broadcasting
159 |         Tensor2D(data2.map(_.map(v => data - v)))
160 |       case (Tensor1D(data), Tensor0D(data2)) => // broadcasting
161 |         Tensor1D(data.map(_ - data2))
162 |       case (t1 @ Tensor2D(_), t2 @ Tensor1D(_)) => // broadcasting
163 |         matrixMinusVector(t1, t2)
164 |       case (Tensor4D(data), Tensor4D(data2)) =>
165 |         checkShapeEquality(a, b)
166 |         val res = data.zip(data2).map { (cubes, cubes2) =>
167 |           cubes.zip(cubes2).map { (mat1, mat2) =>
168 |             matrixMinusMatrix(mat1, mat2)
169 |           }
170 |         }
171 |         Tensor4D(res)
172 |       case (t1, t2) => 
173 |         sys.error(s"Not implemented for\n$t1 and\n$t2")
174 | 
175 |   private def matrixMinusMatrix[T: ClassTag: Numeric](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] =
176 |     val rows = a.length
177 |     val cols = a.headOption.map(_.length).getOrElse(0)
178 |     val res = Array.ofDim[T](rows, cols)
179 | 
180 |     for i <- a.indices do
181 |       for j <- 0 until cols do
182 |         res(i)(j) = a(i)(j) - b(i)(j)
183 |     res
184 |         
185 |   private def matrixMinusVector[T: Numeric: ClassTag](
186 |       matrix: Tensor2D[T],
187 |       vector: Tensor1D[T]
188 |   ) =
189 |     val cols = matrix.shape2D._2
190 |     assert(
191 |       cols == vector.length,
192 |       s"trailing axis must have the same size, $cols != ${vector.length}"
193 |     )
194 |     val res = matrix.data.map(_.zip(vector.data).map{(a, b) => a - b })
195 |     Tensor2D(res)
196 | 
197 |   private def checkShapeEquality[T](a: Tensor[T],  b: Tensor[T]) = 
198 |     assert(a.shape == b.shape, s"Tensors must have the same shape: ${a.shape} != ${b.shape}")
199 | 
200 |   def optPlus[T: ClassTag: Numeric](a: Tensor[T], b: Option[Tensor[T]]): Tensor[T] =
201 |     b.fold(a)(t => plus(a, t))
202 | 
203 |   def plus[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] =
204 |     (a, b) match
205 |       // broadcasting
206 |       case (Tensor2D(data), Tensor0D(data2)) =>
207 |         Tensor2D(data.map(_.map(_ + data2)))
208 |       case (Tensor0D(data), Tensor2D(data2)) =>
209 |         Tensor2D(data2.map(_.map(_ + data)))
210 |       case (t1 @ Tensor2D(_), t2 @ Tensor1D(_)) =>
211 |         matrixPlusVector(t1, t2)
212 |       case (t1 @ Tensor1D(_), t2 @ Tensor2D(_)) =>
213 |         matrixPlusVector(t2, t1)
214 |       case (Tensor1D(data), Tensor0D(data2)) =>
215 |         Tensor1D(data.map(_ + data2))
216 |       case (Tensor0D(data), Tensor1D(data2)) =>
217 |         Tensor1D(data2.map(_ + data))
218 |       case (Tensor4D(data), Tensor0D(data2)) =>
219 |         Tensor4D(data.map(_.map(_.map(_.map(_ + data2)))))
220 | 
221 |       case (Tensor1D(data), Tensor1D(data2)) =>
222 |         checkShapeEquality(a, b)        
223 |         val res = Array.ofDim(data.length)
224 |         for i <- 0 until data.length do 
225 |           res(i) = data(i) + data2(i) 
226 |         Tensor1D(res)
227 |       case (t1 @ Tensor2D(data), Tensor2D(data2)) =>
228 |         checkShapeEquality(a, b)        
229 |         val res = matrixPlusMatrix(data, data2)
230 |         Tensor2D(res)
231 |       case (Tensor4D(data), Tensor4D(data2)) =>
232 |         checkShapeEquality(a, b)
233 |         val res = data.zip(data2).map { (cubes1, cubes2) =>
234 |           cubes1.zip(cubes2).map { (mat1, mat2) =>
235 |             matrixPlusMatrix(mat1, mat2)
236 |           }
237 |         }
238 |         Tensor4D(res)      
239 |       case (Tensor0D(data), Tensor0D(data2)) =>
240 |         Tensor0D(data + data2)      
241 |       case _ => notImplementedError(a :: b:: Nil)
242 | 
243 |   private def matrixPlusMatrix[T: ClassTag: Numeric](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] = 
244 |     val (rows, cols) = (a.length, a.head.length)
245 |     val res = Array.ofDim(rows, cols)
246 |     for i <- 0 until rows do
247 |       for j <- 0 until cols do
248 |         res(i)(j) = a(i)(j) + b(i)(j)
249 |     res
250 |     
251 |   private def notImplementedError[T](ts: List[Tensor[T]]) =
252 |     sys.error(s"Not implemented for: ${ts.mkString("\n")}")
253 | 
254 |   private def matrixPlusVector[T: ClassTag: Numeric](
255 |       t1: Tensor2D[T],
256 |       t2: Tensor1D[T]
257 |   ) =
258 |     val (rows, cols) = t1.shape2D
259 |     assert(
260 |       cols == t2.length,
261 |       s"tensors must have the same amount of cols to sum them up element-wise, but were: $cols != ${t2.length}"
262 |     )
263 |     val sum = Array.ofDim[T](rows, cols)
264 |     for i <- 0 until rows do
265 |       for j <- 0 until cols do
266 |         sum(i)(j) = t1.data(i)(j) + t2.data(j)
267 |     Tensor2D(sum)
268 | 
269 |   def optMul[T: ClassTag: Numeric](a: Tensor[T], b: Option[Tensor[T]]): Tensor[T] =
270 |     b.fold(a)(t => mul(a, t))     
271 | 
272 |   def mul[T: ClassTag: Numeric](a: Tensor[T], b: Tensor[T]): Tensor[T] =
273 |     (a, b) match
274 |       case (Tensor0D(data), t) =>
275 |         scalarMul(t, data)
276 |       case (t, Tensor0D(data)) =>
277 |         scalarMul(t, data)
278 |       case (Tensor1D(data), Tensor2D(data2)) =>
279 |         Tensor2D(matMul(Array(data), data2))
280 |       case (Tensor2D(data), Tensor1D(data2)) =>
281 |         Tensor2D(matMul(data, asColumn(data2)))
282 |       case (Tensor1D(data), Tensor1D(data2)) =>
283 |         Tensor0D(matMul(Array(data), asColumn(data2)).head.head)
284 |       case (Tensor2D(data), Tensor2D(data2)) =>
285 |         Tensor2D(matMul(data, data2))
286 |       case _ => notImplementedError(a :: b :: Nil)
287 | 
288 |   private def asColumn[T: ClassTag](a: Array[T]) = a.map(Array(_))
289 | 
290 |   def map[T: ClassTag, U: ClassTag](t: Tensor[T], f: T => U): Tensor[U] =
291 |     t match
292 |       case Tensor0D(data) => Tensor0D(f(data))
293 |       case Tensor1D(data) => Tensor1D(data.map(f))
294 |       case Tensor2D(data) => Tensor2D(data.map(_.map(f)))
295 |       case Tensor3D(data) => Tensor3D(data.map(_.map(_.map(f))))
296 |       case Tensor4D(data) => Tensor4D(data.map(_.map(_.map(_.map(f)))))      
297 |   
298 |   def mapRow[T: ClassTag, U: ClassTag](t: Tensor[T], f: Array[T] => Array[U]): Tensor[U] =
299 |     t match
300 |       case Tensor0D(data) => Tensor0D(f(Array(data)).head)
301 |       case Tensor1D(data) => Tensor1D(f(data))
302 |       case Tensor2D(data) => Tensor2D(data.map(f))
303 |       case _ => notImplementedError(t :: Nil)
304 |   
305 |   private def map2[T: ClassTag, U: ClassTag](a: Array[T], b: Array[T], f: (T, T) => U): Array[U] = 
306 |     val res = Array.ofDim[U](a.length)
307 |     for i <- (0 until a.length).indices do
308 |       res(i) = f(a(i), b(i))
309 |     res  
310 | 
311 |   def map2[T: ClassTag: Numeric, U: ClassTag: Numeric](a: Tensor[T], b: Tensor[T], f: (T, T) => U): Tensor[U] =
312 |     (a, b) match
313 |       case (Tensor0D(data), Tensor0D(data2)) => 
314 |         Tensor0D(f(data, data2))
315 |       case (Tensor1D(data), Tensor1D(data2)) =>                 
316 |         Tensor1D(map2(data, data2, f))
317 |       case (Tensor2D(data), Tensor2D(data2)) =>
318 |         val res = Array.ofDim[U](data.length, colsCount(data2))
319 |         for i <- (0 until data.length).indices do
320 |           res(i) = map2(data(i), data2(i), f)
321 |         Tensor2D(res)
322 |       case _ => 
323 |         sys.error(s"Both tensors must have the same dimension: ${a.shape} != ${b.shape}")
324 |     
325 |   private def colsCount[T](a: Array[Array[T]]): Int =
326 |     a.headOption.map(_.length).getOrElse(0)
327 | 
328 |   private def scalarMul[T: ClassTag: Numeric](
329 |       t: Tensor[T],
330 |       scalar: T
331 |   ): Tensor[T] =
332 |     t match
333 |       case Tensor0D(data) => Tensor0D(data * scalar)
334 |       case Tensor1D(data) => Tensor1D(data.map(_ * scalar))
335 |       case Tensor2D(data) => Tensor2D(data.map(_.map(_ * scalar)))
336 |       case Tensor4D(data) => Tensor4D(data.map(_.map(_.map(_.map(_ * scalar)))))
337 |       case _ => notImplementedError(t :: Nil)
338 | 
339 |   private def matMul[T: ClassTag](
340 |       a: Array[Array[T]],
341 |       b: Array[Array[T]]
342 |   )(using n: Numeric[T]): Array[Array[T]] =
343 |     assert(
344 |       a.head.length == b.length,
345 |       s"The number of columns in the first matrix should be equal to the number of rows in the second, ${a.head.length} != ${b.length}"
346 |     )
347 |     val rows = a.length
348 |     val cols = colsCount(b)
349 |     val res = Array.ofDim[T](rows, cols)
350 | 
351 |     for i <- (0 until rows).indices do
352 |       for j <- (0 until cols).indices do
353 |         var sum = n.zero
354 |         for k <- b.indices do
355 |           sum = sum + (a(i)(k) * b(k)(j))
356 |         res(i)(j) = sum    
357 |     res
358 | 
359 |   def as0D[T: ClassTag](t: Tensor[T]): Tensor0D[T] =
360 |     t match
361 |       case Tensor0D(data)   => Tensor0D(data)
362 |       case t1 @ Tensor1D(data) => Tensor0D(data.head)
363 |       case Tensor2D(data)   => Tensor0D(data.head.head)
364 |       case _ => notImplementedError(t :: Nil)
365 |   
366 |   def as1D[T: ClassTag](t: Tensor[T]): Tensor1D[T] =
367 |     t match
368 |       case Tensor0D(data)   => Tensor1D(data)
369 |       case t1 @ Tensor1D(_) => t1
370 |       case Tensor2D(data)   => Tensor1D(data.flatten)
371 |       case _ => notImplementedError(t :: Nil)
372 | 
373 |   def as2D[T: ClassTag](t: Tensor[T]): Tensor2D[T] =
374 |     t match
375 |       case Tensor0D(data)   => Tensor2D(Array(Array(data)))
376 |       case Tensor1D(data)   => Tensor2D(data.map(Array(_)))
377 |       case t1 @ Tensor2D(_) => t1
378 |       case t1 @ Tensor4D(data) => Tensor2D(data.map(_.map(_.flatten).flatten))
379 |       case _ => notImplementedError(t :: Nil)
380 |   
381 |   def as3D[T: ClassTag](t: Tensor[T]): Tensor3D[T] =
382 |     t match
383 |       case Tensor0D(data)   => Tensor3D(Array(Array(Array(data))))
384 |       case Tensor2D(data)   => Tensor3D(Array(data))
385 |       case t1 @ Tensor3D(_) => t1
386 |       case _ => notImplementedError(t :: Nil)
387 | 
388 |   def as4D[T: ClassTag](t: Tensor[T]): Tensor4D[T] =
389 |     t match
390 |       case Tensor0D(data)   => Tensor4D(Array(Array(Array(Array(data)))))
391 |       case Tensor1D(data)   => Tensor4D(Array(Array(data.map(Array(_)))))
392 |       case t2 @ Tensor2D(_) => Tensor4D(Array(Array(t2.data)))
393 |       case t1 @ Tensor4D(_) => t1
394 |       case _ => notImplementedError(t :: Nil)
395 | 
396 |   def sum[T: Numeric: ClassTag](t: Tensor[T]): T =
397 |     t match
398 |       case Tensor0D(data) => data
399 |       case Tensor1D(data) => data.sum
400 |       case Tensor2D(data) => data.map(_.sum).sum
401 |       case Tensor4D(data) => data.map(_.map(_.map(_.sum).sum).sum).sum
402 |       case _ => notImplementedError(t :: Nil)
403 |   
404 |   def sumRows[T: Numeric: ClassTag](t: Tensor[T]): Tensor[T] =
405 |     t match
406 |       case Tensor0D(_) => t
407 |       case Tensor1D(_) => t
408 |       case Tensor2D(data) => 
409 |         Tensor1D(data.reduce((a, b) => a.lazyZip(b).map(_ + _).toArray))
410 |       case _ => notImplementedError(t :: Nil)
411 |   
412 |   def sumCols[T: Numeric: ClassTag](t: Tensor[T]): Tensor[T] =
413 |     t match
414 |       case Tensor0D(_) => t
415 |       case Tensor1D(data) => Tensor0D(data.sum)
416 |       case Tensor2D(data) => Tensor2D(data.map(a => Array(a.sum)))
417 |       case _ => notImplementedError(t :: Nil)
418 | 
419 |   def transpose[T: ClassTag](t: Tensor[T]): Tensor[T] =
420 |     t match
421 |       case t2 @ Tensor2D(data) =>
422 |         val (rows, cols) = t2.shape2D
423 |         val transposed = Array.ofDim[T](cols, rows)
424 | 
425 |         for i <- (0 until rows).indices do
426 |           for j <- (0 until cols).indices do
427 |             transposed(j)(i) = data(i)(j)
428 |         Tensor2D(transposed)
429 |       case Tensor1D(data) => Tensor2D(asColumn(data))
430 |       case _              => t
431 | 
432 |   def split[T: ClassTag](
433 |       fraction: Float,
434 |       t: Tensor[T]
435 |   ): (Tensor[T], Tensor[T]) =
436 |     t match
437 |       case Tensor0D(_) => (t, t)
438 |       case Tensor1D(data) =>
439 |         val (l, r) = splitArray(fraction, data)
440 |         (Tensor1D(l), Tensor1D(r))
441 |       case Tensor2D(data) =>
442 |         val (l, r) = splitArray(fraction, data)
443 |         (Tensor2D(l), Tensor2D(r))
444 |       case _ => notImplementedError(t :: Nil)
445 | 
446 |   private def splitArray[T](
447 |       fraction: Float,
448 |       data: Array[T]
449 |   ): (Array[T], Array[T]) =
450 |     val count = data.length * fraction
451 |     val countOrZero = if count < 1 then 0 else count
452 |     data.splitAt(data.length - countOrZero.toInt)
453 | 
454 |   def split[T: ClassTag](
455 |       fraction: Float,
456 |       t: (Tensor[T], Tensor[T])
457 |   ): ((Tensor[T], Tensor[T]), (Tensor[T], Tensor[T])) =
458 |     val (l, r) = t
459 |     assert(l.length == r.length, s"Both tensors must have the same length, ${l.length} != ${r.length}")
460 |     split(fraction, l) -> split(fraction, r)
461 | 
462 |   def multiply[T: Numeric: ClassTag](
463 |       t1: Tensor[T],
464 |       t2: Tensor[T]
465 |   ): Tensor[T] =    
466 |     assert(
467 |       t1.length == t2.length,
468 |       s"Both vectors must have the same length, ${t1.length} != ${t2.length}"
469 |     )
470 |     (t1, t2) match
471 |       case (Tensor1D(data), Tensor1D(data2)) =>
472 |         Tensor1D(data.zip(data2).map((a, b) => a * b))
473 |       case (a @ Tensor2D(data), Tensor2D(data2)) =>
474 |         val (rows, cols) = a.shape2D
475 |         val sum = Array.ofDim[T](rows, cols)
476 |         for i <- 0 until rows do
477 |           for j <- 0 until cols do
478 |             sum(i)(j) = data(i)(j) * data2(i)(j)
479 |         Tensor2D(sum)
480 |       case (a, b) => sys.error(s"Not implemented for:\n$a\nand\n$b")
481 | 
482 |   def optMultiply[T: Numeric: ClassTag](
483 |     t1: Tensor[T], t2: Option[Tensor[T]]
484 |   ): Tensor[T] = 
485 |     t2.fold(t1)(a => multiply(t1, a))
486 | 
487 |   def batches[T: ClassTag: Numeric](
488 |       t: Tensor[T],
489 |       batchSize: Int
490 |   ): Iterator[Tensor[T]] =
491 |     t match
492 |       case Tensor0D(data) => Iterator(t)
493 |       case Tensor1D(data) => data.grouped(batchSize).map(Tensor1D(_))
494 |       case Tensor2D(data) => data.grouped(batchSize).map(Tensor2D(_))
495 |       case Tensor3D(data) => data.grouped(batchSize).map(Tensor3D(_))
496 |       case Tensor4D(data) => data.grouped(batchSize).map(Tensor4D(_))
497 | 
498 |   def equalRows[T: ClassTag](t1: Tensor[T], t2: Tensor[T]): Int = 
499 |     assert(t1.shape == t2.shape, sys.error(s"Tensors must have the same shape: ${t1.shape} != ${t2.shape}"))
500 |     (t1, t2) match
501 |       case (Tensor0D(data), Tensor0D(data2)) =>         
502 |         if data == data2 then 1 else 0
503 |       case (Tensor1D(data), Tensor1D(data2)) => 
504 |         data.zip(data2).count(_ == _)        
505 |       case (Tensor2D(data), Tensor2D(data2)) => 
506 |         data.zip(data2).foldLeft(0) { case (acc, (a, b)) => if a.sameElements(b) then acc + 1 else acc }
507 |       case _ => 
508 |         sys.error(s"Tensors must be the same dimension: ${t1.shape} != ${t2.shape}")
509 |   
510 |   def clipInRange[T: ClassTag](t: Tensor[T], min: T, max: T)(using n: Numeric[T]): Tensor[T] =
511 |     def clipValue(v: T) =
512 |       val vAbs = v.abs          
513 |           if vAbs > max then max
514 |           else if vAbs < min then min
515 |           else v
516 | 
517 |     map(t, clipValue)    
518 |   
519 |   def clipByNorm[T: ClassTag](t: Tensor[T], norm: T)(using n: Fractional[T]): Tensor[T] = 
520 |     val l2norm = castFromTo[Double, T](math.sqrt(castFromTo[T, Double](sum(pow(t, 2)))))
521 |     if l2norm > norm then
522 |       map(t, v => n.times(v, norm) / l2norm)  
523 |     else t
524 |   
525 |   def div[T: ClassTag: Fractional](t1: Tensor[T], t2: Tensor[T]): Tensor[T] =    
526 |     (t1, t2) match
527 |       // broadcasting
528 |       case (Tensor2D(data), Tensor0D(data2)) => Tensor2D(data.map(_.map(_ / data2)))
529 |       case (Tensor1D(data), Tensor0D(data2)) => Tensor1D(data.map(_ / data2))
530 |       case (Tensor4D(data), Tensor0D(data2)) => Tensor4D(data.map(_.map(_.map(_.map(_ / data2)))))
531 |       
532 |       case (Tensor0D(data), Tensor0D(data2)) => Tensor0D(data / data2)
533 |       case (Tensor1D(data), Tensor1D(data2)) => Tensor1D(data.zip(data2).map(_ /_))
534 |       case (Tensor2D(data), Tensor2D(data2)) =>        
535 |         Tensor2D(matrixDivMatrix(data, data2))
536 |       case (Tensor4D(data), Tensor4D(data2)) =>
537 |         val res = data.zip(data2).map { (cubes1, cubes2) =>
538 |           cubes1.zip(cubes2).map { (mat1, mat2) => 
539 |             matrixDivMatrix(mat1, mat2)
540 |           }
541 |         }
542 |         Tensor4D(res)
543 |       case _ => notImplementedError(t1 :: t2 :: Nil)
544 |   
545 |   private def matrixDivMatrix[T: ClassTag: Fractional](a: Array[Array[T]], b: Array[Array[T]]): Array[Array[T]] =
546 |     a.zip(b).map((a, b) => a.zip(b).map(_ / _))
547 | 
548 |   def sqrt[T: ClassTag: Numeric](t: Tensor[T]): Tensor[T] = 
549 |     map(t, v => castFromTo[Double, T](math.sqrt(castFromTo[T, Double](v))))
550 | 
551 |   def pow[T: ClassTag](t: Tensor[T], to: Int)(using n: Numeric[T]): Tensor[T] =
552 |     def powValue(v: T) =
553 |       val res = math.pow(n.toDouble(v), to)
554 |       castFromTo[Double, T](res)
555 |     def powArray(a: Array[T]) =
556 |       a.map(powValue)
557 |     def powMatrix(a: Array[Array[T]]) =
558 |       a.map(_.map(powValue))
559 | 
560 |     t match
561 |       case Tensor0D(data) => Tensor0D(powValue(data))
562 |       case Tensor1D(data) => Tensor1D(powArray(data))
563 |       case Tensor2D(data) => Tensor2D(powMatrix(data))
564 |       case Tensor4D(data) => Tensor4D(data.map(_.map(powMatrix)))
565 |       case _ => notImplementedError(t :: Nil)
566 |   
567 |   def zero[T: ClassTag](t: Tensor[T])(using n: Numeric[T]): Tensor[T] =
568 |     t match 
569 |       case Tensor0D(_) => Tensor0D(n.zero)
570 |       case Tensor1D(data) => Tensor1D(Array.fill(data.length)(n.zero))
571 |       case t1 @ Tensor2D(_) => 
572 |         val (rows, cols) = t1.shape2D
573 |         Tensor2D(Array.fill(rows, cols)(n.zero))
574 |       case t1 @ Tensor3D(_) =>
575 |         val (cubes, rows, cols) = t1.shape3D
576 |         Tensor3D(Array.fill(cubes, rows, cols)(n.zero))
577 |       case t1 @ Tensor4D(_) =>
578 |         val (tensors, cubes, rows, cols) = t1.shape4D
579 |         Tensor4D(Array.fill(tensors, cubes, rows, cols)(n.zero))
580 | 
581 |   def col[T: ClassTag](data: Array[Array[T]], i: Int): Array[T] =
582 |     val to = i + 1
583 |     slice(data, None, Some(i, to)).flatMap(_.headOption)
584 | 
585 |   def slice[T: ClassTag](
586 |       data: Array[Array[T]],
587 |       rows: Option[(Int, Int)] = None,
588 |       cols: Option[(Int, Int)] = None
589 |   ): Array[Array[T]] =
590 |     (rows, cols) match
591 |       case (Some((rowsFrom, rowsTo)), Some((colsFrom, colsTo))) =>
592 |         sliceArr(data, (rowsFrom, rowsTo)).map(a =>
593 |           sliceArr(a, (colsFrom, colsTo))
594 |         )
595 |       case (None, Some((colsFrom, colsTo))) =>
596 |         data.map(a => sliceArr(a, (colsFrom, colsTo)))
597 |       case (Some((rowsFrom, rowsTo)), None) =>
598 |         sliceArr(data, (rowsFrom, rowsTo))
599 |       case _ => data
600 | 
601 |   def sliceArr[T: ClassTag](
602 |       data: Array[Array[T]],
603 |       rows: (Int, Int),
604 |       cols: (Int, Int)
605 |   ): Array[Array[T]] = 
606 |     sliceArr(data, rows).map(a =>
607 |       sliceArr(a, cols)
608 |     )
609 | 
610 |   def sliceArr[T](
611 |       data: Array[T],
612 |       range: (Int, Int)
613 |   ): Array[T] =
614 |     val (l, r) = range
615 |     val from = if l < 0 then data.length + l else l
616 |     val to = if r < 0 then data.length + r else if r == 0 then data.length else r
617 |     data.slice(from, to)
618 | 
619 |   // returns max index per array
620 |   // for 2D Tensor: returns an array of indices where every element is a max index for a specific row  
621 |   def argMax[T: ClassTag](t: Tensor[T])(using n: Numeric[T]) =
622 |     def maxIndex(a: Array[T]) = 
623 |       n.fromInt(a.indices.maxBy(a))
624 | 
625 |     t match
626 |       case Tensor2D(data) => Tensor1D(data.map(maxIndex))
627 |       case Tensor1D(data) => Tensor0D(maxIndex(data))
628 |       case Tensor0D(_) => t
629 |       case _ => notImplementedError(t :: Nil)
630 | 
631 |   def outer[T: ClassTag: Numeric](t1: Tensor[T], t2: Tensor[T]): Tensor[T] =
632 |     def product(a: Array[T], b: Array[T]) =
633 |       val res = Array.ofDim(a.length, b.length)
634 |       for i <- 0 until a.length do
635 |         for j <- 0 until b.length do
636 |           res(i)(j) = a(i) * b(j)
637 |       res
638 | 
639 |     (t1, t2) match
640 |       case (Tensor0D(d), Tensor0D(d2)) => Tensor0D(d * d2)
641 |       case (Tensor0D(d), _) => scalarMul(t2, d)  
642 |       case (Tensor1D(d), Tensor0D(d2)) => scalarMul(t1, d2)
643 |       case (Tensor1D(d), Tensor1D(d2)) => Tensor2D(product(d, d2))
644 |       case (Tensor1D(d), Tensor2D(d2)) => Tensor2D(product(d, d2.flatten))
645 |       case (Tensor2D(d), Tensor0D(d2)) => scalarMul(t1, d2)
646 |       case (Tensor2D(d), Tensor1D(d2)) => Tensor2D(product(d.flatten, d2))
647 |       case (Tensor2D(d), Tensor2D(d2)) => Tensor2D(product(d.flatten, d2.flatten))
648 |       case _ => notImplementedError(t1 :: t2 :: Nil)
649 | 
650 |   def flatten[T: ClassTag](t: Tensor[T]): Tensor[T] = 
651 |     t match
652 |       case Tensor0D(_) => t
653 |       case Tensor1D(_) => t
654 |       case Tensor2D(data) => Tensor1D(data.flatten)
655 |       case _ => notImplementedError(t :: Nil)
656 |   
657 |   def diag[T: ClassTag](t: Tensor[T])(using n: Numeric[T]): Tensor[T] =
658 |     t match
659 |       case Tensor0D(_) => t
660 |       case Tensor1D(d) => 
661 |         val res = Array.ofDim(d.length, d.length)
662 |         for i <- 0 until d.length do
663 |           for j <- 0 until d.length do
664 |             res(i)(j) = if i == j then d(i) else n.zero
665 |         Tensor2D(res)
666 |       case t2 @ Tensor2D(d) =>
667 |         val size = t2.shape.min
668 |         val res = Array.ofDim(size)
669 |         for i <- 0 until size do
670 |           for j <- 0 until size if i == j do
671 |             res(i) = d(i)(j)
672 |         Tensor1D(res)
673 |       case _ => notImplementedError(t :: Nil)
674 | 
675 |   def max[T: ClassTag: Numeric](t: Tensor[T]): T =
676 |     t match
677 |       case Tensor0D(d) => d
678 |       case Tensor1D(d) => d.max
679 |       case Tensor2D(d) => d.map(_.max).max
680 |       case Tensor3D(d) => d.map(_.map(_.max).max).max
681 |       case Tensor4D(d) => d.map(_.map(_.map(_.max).max).max).max
682 | 
683 |   def reshape[T: ClassTag: Numeric](t: Tensor[T], shape: List[Int]): Tensor[T] =
684 |     shape match
685 |       case cubes :: rows :: cols :: _ => t match
686 |         case Tensor2D(data) =>
687 |           Tensor4D(data.flatMap(_.grouped(cols).toArray.grouped(rows).toArray.grouped(cubes).toArray))
688 |         case _ => t    
689 |       case _ => t
690 | 
691 |       
692 | 


--------------------------------------------------------------------------------