├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.sbt
├── fp4ml-main
├── README.md
├── build.sbt
└── src
│ ├── main
│ └── scala
│ │ └── mlbigbook
│ │ ├── app
│ │ └── Exp20NG.scala
│ │ ├── math
│ │ ├── Argmax.scala
│ │ ├── Argmin.scala
│ │ ├── BaseMathVecOps.scala
│ │ ├── Dense.scala
│ │ ├── MathVectorOps.scala
│ │ ├── NumericConversion.scala
│ │ ├── RandoMut.scala
│ │ ├── Sparse.scala
│ │ ├── Val.scala
│ │ ├── VectorOps.scala
│ │ └── package.scala
│ │ ├── ml
│ │ ├── ClassificationModule.scala
│ │ ├── ClusteringConf.scala
│ │ ├── ClusteringModule.scala
│ │ ├── CustomHashMap.scala
│ │ ├── Hashable.scala
│ │ ├── ItemNumVecModule.scala
│ │ ├── Kmeans.scala
│ │ ├── KnnClassifier.scala
│ │ ├── NearestNeighbors.scala
│ │ ├── OLD_KnnClassifier.scala
│ │ └── RankingModule.scala
│ │ └── util
│ │ └── package.scala
│ └── test
│ ├── resources
│ └── log4j.properties
│ └── scala
│ └── mlbigbook
│ ├── math
│ ├── AbstractMathVectorOpsT.scala
│ ├── AbstractMvoFractionalT.scala
│ ├── MathVectorOpsDenseDoubleTest.scala
│ ├── MathVectorOpsDenseFloatTest.scala
│ ├── MathVectorOpsDenseIntTest.scala
│ ├── MathVectorOpsDenseLongTest.scala
│ ├── MathVectorOpsSparseDoubleTest.scala
│ ├── MathVectorOpsSparseFloatTest.scala
│ ├── MathVectorOpsSparseIntTest.scala
│ └── MathVectorOpsSparseLongTest.scala
│ └── ml
│ ├── AddressData.scala
│ ├── KmeansTest.scala
│ ├── KnnClassifierTest.scala
│ ├── KnnLshClassifierTest.scala
│ ├── NearestNeighborsLSHTest.scala
│ ├── NearestNeighborsTest.scala
│ └── OLDKMeansTest.scala
├── fp4ml-spark
├── README.md
├── build.sbt
└── src
│ └── main
│ └── scala
│ └── TODO
└── project
├── SharedBuild.scala
├── build.properties
└── plugins.sbt
/.gitignore:
--------------------------------------------------------------------------------
1 | index-*.html
2 | src/main/resources/tessdata
3 | LOG_*
4 | OUTPUT_*
5 |
6 | *.class
7 | *.log
8 | *.swp
9 | project/target
10 | .idea*
11 | *.DS_Store
12 |
13 | # sbt specific
14 | .cache/
15 | .history/
16 | .lib/
17 | dist/*
18 | target/
19 | lib_managed/
20 | src_managed/
21 | project/boot/
22 | project/plugins/project/
23 |
24 | # Scala-IDE specific
25 | .scala_dependencies
26 | .worksheet
27 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - 2.11.8
4 | jdk:
5 | - oraclejdk8
6 | script: "sbt test"
7 | after_success: "sbt coverage test coveralls"
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright [2016] [Malcolm W. Greaves]
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.Copyright [yyyy] [name of copyright owner]
14 |
15 | Licensed under the Apache License, Version 2.0 (the "License");
16 | you may not use this file except in compliance with the License.
17 | You may obtain a copy of the License at
18 |
19 | http://www.apache.org/licenses/LICENSE-2.0
20 |
21 | Unless required by applicable law or agreed to in writing, software
22 | distributed under the License is distributed on an "AS IS" BASIS,
23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | See the License for the specific language governing permissions and
25 | limitations under the License.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fp4ml
2 | [](https://travis-ci.org/malcolmgreaves/fp4ml) [](https://coveralls.io/github/malcolmgreaves/fp4ml?branch=master)
3 | [](https://www.codacy.com/app/greavesmalcolm/fp4ml) [](https://waffle.io/malcolmgreaves/fp4ml) [](https://gitter.im/malcolmgreaves/fp4ml?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [](https://maven-badges.herokuapp.com/maven-central/io.malcolmgreaves/fp4ml-scala_2.11)
4 |
5 | Machine learning for functional programmers.
6 |
7 | # Project Structure
8 |
9 | This repository is split into subprojects:
10 |
11 | * [fp4ml-main](https://github.com/malcolmgreaves/fp4ml/tree/master/fp4ml-core)
12 | * The meat and potatoes of the fp4ml project. Includes:
13 | * learning algorithms
14 | * abstractions
15 | * data structures
16 | * experiment frameworks
17 | * evaluation metrics
18 | * model definitions, formats
19 | * Depends on 3rd party libraries including:
20 | * [`data-tc`](https://github.com/malcolmgreaves/data-tc)
21 | * `shapeless`
22 | * `spire`
23 |
24 | * [fp4ml-spark](https://github.com/malcolmgreaves/fp4ml/tree/master/fp4ml-spark)
25 | * An extension of `fp4ml-main` to use elements from the Apache Spark ecosystem.
26 |
27 | # Legal
28 |
29 | The original author retains copyright over all material contained within this repository. Use of this code is governed under the terms of the Apache 2.0 open source software license. See the [LICENSE](./LICENSE) file for more details.
30 |
31 |
--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
1 | name := "fp4ml"
2 | organization in ThisBuild := "io.malcolmgreaves"
3 | version in ThisBuild := {
4 | val major: Int = 0
5 | val minor: Int = 0
6 | val patch: Int = 0
7 | s"$major.$minor.$patch"
8 | }
9 |
10 | import SharedBuild._
11 |
12 | lazy val root = project
13 | .in(file("."))
14 | .aggregate(
15 | `fp4ml-main`,
16 | `fp4ml-spark`
17 | )
18 | .settings {
19 | publishArtifact := false
20 | publishLocal := {}
21 | publish := {}
22 | }
23 |
24 | lazy val `fp4ml-main` = project.in(file("fp4ml-main")).settings {
25 | publishArtifact := true
26 | }
27 |
28 | lazy val `fp4ml-spark` =
29 | project.in(file("fp4ml-spark")).dependsOn(`fp4ml-main`).settings {
30 | publishArtifact := true
31 | }
32 |
33 | lazy val subprojects: Seq[ProjectReference] = root.aggregate
34 | lazy val publishTasks = subprojects.map { publish.in }
35 |
36 | resolvers in ThisBuild := Seq(
37 | // sonatype, maven central
38 | "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/",
39 | "Sonatype Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/",
40 | // bintray
41 | "Scalaz Bintray" at "http://dl.bintray.com/scalaz/releases",
42 | Resolver.bintrayRepo("mfglabs", "maven"),
43 | Resolver.bintrayRepo("dwhjames", "maven"),
44 | // etc.
45 | "Confluent" at "http://packages.confluent.io/maven/"
46 | )
47 |
48 | lazy val javaV = "1.8"
49 | scalaVersion in ThisBuild := "2.11.8"
50 | scalacOptions in ThisBuild := Seq(
51 | "-optimize",
52 | "-deprecation",
53 | "-feature",
54 | "-unchecked",
55 | s"-target:jvm-$javaV",
56 | "-encoding",
57 | "utf8",
58 | "-language:postfixOps",
59 | "-language:existentials",
60 | "-language:higherKinds",
61 | "-language:implicitConversions",
62 | "-language:experimental.macros",
63 | "-language:reflectiveCalls",
64 | "-Yno-adapted-args",
65 | "-Ywarn-value-discard",
66 | "-Yinline-warnings",
67 | "-Xlint",
68 | "-Xfuture",
69 | "-Ywarn-dead-code",
70 | "-Xfatal-warnings" // Every warning is esclated to an error.
71 | )
72 | javacOptions in ThisBuild := Seq("-source", javaV, "-target", javaV)
73 | javaOptions in ThisBuild := Seq(
74 | "-server",
75 | "-XX:+AggressiveOpts",
76 | "-XX:+TieredCompilation",
77 | "-XX:CompileThreshold=100",
78 | "-Xmx3000M",
79 | "-XX:+UseG1GC"
80 | )
81 |
82 | publishArtifact := false
83 |
--------------------------------------------------------------------------------
/fp4ml-main/README.md:
--------------------------------------------------------------------------------
1 | # fp4ml-main
2 | [Maven Central](https://maven-badges.herokuapp.com/maven-central/io.malcolmgreaves/fp4ml-main_2.11/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/io.malcolmgreaves/fp4ml-main_2.11)
3 | Machine learning for functional programmers.
4 | A library of machine learning algorithms. Implemented from the get-go using principles of functional programming, all algorithm APIs are referentially transparent and stateless. Additionally, algoritghms are implemented in a maximally general way, with the objective to "write once, run anywhere".
--------------------------------------------------------------------------------
/fp4ml-main/build.sbt:
--------------------------------------------------------------------------------
1 | name := "fp4ml-main"
2 |
3 | import SharedBuild._
4 |
5 | addCompilerPlugin(scalaMacros)
6 |
7 | libraryDependencies ++=
8 | fp4mlMainDeps ++
9 | testDeps
10 |
11 | //
12 | // test, runtime settings
13 | //
14 | fork in run := true
15 | fork in Test := true
16 | parallelExecution in Test := true
17 |
18 | pomExtra := pomExtraInfo
19 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/app/Exp20NG.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.app
2 |
3 | import java.io.{FileReader, BufferedReader, File}
4 | import java.nio.charset.Charset
5 | import java.nio.file.Files
6 |
7 | import breeze.linalg.{SparseVector, DenseVector}
8 | import mlbigbook.math.MathVectorOps
9 | import mlbigbook.ml.{ImplicitHashable, KnnClassifier}
10 |
11 | import scala.io.Source
12 | import scala.util.Random
13 |
14 | object Exp20NG extends App {
15 |
16 | lazy val normalizeLine: String => String =
17 | s => s.trim.toLowerCase
18 |
19 | lazy val filterLine: String => Boolean =
20 | s =>
21 | s.nonEmpty &&
22 | headerPrefixes.forall { !s.startsWith(_) } &&
23 | headerSuffixes.forall { !s.endsWith(_) }
24 |
25 | lazy val labelTransform: String => String =
26 | label => {
27 | val i = label.indexOf(".")
28 | if (i >= 0)
29 | label.substring(0, i)
30 | else
31 | label
32 | }
33 |
34 | lazy val headerPrefixes: Seq[String] =
35 | """
36 | |Xref:
37 | |Path:
38 | |From:
39 | |Newsgroups:
40 | |Subject:
41 | |Summary:
42 | |Keywords:
43 | |Message-ID:
44 | |Date:
45 | |Expires:
46 | |Followup-To:
47 | |Distribution:
48 | |Organization:
49 | |Approved:
50 | |Supersedes:
51 | |Lines:
52 | |Archive-name:
53 | |Alt-atheism-archive-name:
54 | |Last-modified:
55 | |Version:
56 | |-----BEGIN PGP SIGNED MESSAGE-----
57 | |In article
58 | |From article
59 | |>
60 | |>>
61 | |References:
62 | |Email:
63 | |Sender:
64 | |NNTP-posting-host
65 | |NNTP-posting-user
66 | |--
67 | |: >:
68 | |: >
69 | | >
70 | |:
71 | |<
72 | """.stripMargin.trim.toLowerCase.split { "\n" }.toSeq
73 |
74 | lazy val headerSuffixes: Seq[String] =
75 | """
76 | |writes:
77 | |.com
78 | """.stripMargin.trim.toLowerCase.split { "\n" }.toSeq
79 |
80 | lazy val ngDirectory = new File("./20_newsgroups")
81 | println(
82 | s"Loading 20 Newsgroup Data from:\n${ngDirectory.getCanonicalPath}\n")
83 |
84 | import scala.collection.JavaConverters._
85 | lazy val loadNgFi: File => Seq[String] =
86 | fi =>
87 | if (fi isFile) {
88 | val br = new BufferedReader(new FileReader(fi))
89 | val buf = new scala.collection.mutable.ArrayBuffer[String](420)
90 | var line: String = br.readLine()
91 | while (line != null) {
92 | buf.append(line)
93 | line = br.readLine()
94 | }
95 | buf.toSeq
96 | }.map { normalizeLine }.filter { filterLine } else
97 | Seq.empty
98 |
99 | lazy val loadNgData: File => Seq[(File, Seq[String])] =
100 | f => {
101 | if (f.isDirectory) {
102 | Option(f.listFiles()).map { _.toSeq }.getOrElse { Seq.empty }.flatMap {
103 | loadNgData
104 | }
105 |
106 | } else if (f.isFile)
107 | Seq((f, loadNgFi(f)))
108 | else
109 | Seq.empty
110 | }
111 |
112 | // // // // // // // // // // // // // // // // // // // // // // // // // //
113 | //
114 | // S C R I P T
115 | //
116 | // // // // // // // // // // // // // // // // // // // // // // // // // //
117 |
118 | lazy val ng20 = ngDirectory.listFiles.filter(_ != null).toSeq
119 | println(s"There are ${ng20.size} newsgroup directories")
120 |
121 | val newsgroup2fileandcontent =
122 | ng20.map { ngDir =>
123 | println(s"loading data from the ${ngDir.getName} newsgroup ... ")
124 | val bothFiLines = loadNgData(ngDir)
125 | (ngDir.getName, bothFiLines)
126 | }.toMap
127 |
128 | type Document = String
129 |
130 | import ImplicitHashable._
131 | import fif.ImplicitCollectionsData._
132 | lazy val knn = KnnClassifier[Document, String, Float, SparseVector](
133 | MathVectorOps.Implicits.FloatSparseVot,
134 | representsNoLabel = ""
135 | )
136 |
137 | val stringVectorizer: knn.Vectorizer = new {
138 |
139 | val word2index: Map[String, Int] = {
140 | val words = {
141 | for {
142 | (_, data) <- newsgroup2fileandcontent
143 | (_, lines) <- data
144 | line <- lines
145 | word <- line.split(" ")
146 | } yield word
147 | }.toSet
148 |
149 | println(s"There are ${words.size} unique words")
150 |
151 | words.zipWithIndex.toMap
152 | }
153 |
154 | lazy val vectorize = (s: Document) =>
155 | SparseVector[Float](word2index.size)({
156 | val bothIndexValue = s.split(" ").foldLeft(Map.empty[Int, Float]) {
157 | case (accum, word) =>
158 | if (word2index contains word) {
159 | val index = word2index(word)
160 | if (accum.contains(index))
161 | (accum - index) + (index -> (accum(index) + 1.0f))
162 | else
163 | accum + (index -> 1.0f)
164 |
165 | } else
166 | accum
167 | }
168 |
169 | bothIndexValue.map {
170 | case (index, count) => (index, math.log(count).toFloat)
171 | }.toSeq
172 | }: _*)
173 |
174 | lazy val nDimensions = word2index.size
175 | }
176 |
177 | val distance: knn.Distance = (v1, v2) => {
178 | val r = knn.vops.subV(v1, v2)
179 | knn.vops.dot(r, r)
180 | }
181 |
182 | val allLabeledData: Seq[(Document, String)] = for {
183 | (ng, bothFiAndData) <- newsgroup2fileandcontent.toSeq
184 | (_, lines) <- bothFiAndData
185 | } yield (lines.mkString("\n"), labelTransform(ng))
186 |
187 | println(s"total labeled data size: ${allLabeledData.size}")
188 |
189 | val (train, test): (Seq[(Document, String)], Seq[(Document, String)]) = {
190 |
191 | val shuffled: Seq[(Document, String)] = allLabeledData.map { x =>
192 | (x, math.random)
193 | }.sortBy { case (_, rando) => rando }.map { case (x, _) => x }
194 |
195 | val si = (shuffled.size * .9).toInt
196 |
197 | (
198 | shuffled.slice(0, si),
199 | shuffled.slice(si + 1, shuffled.size)
200 | )
201 | }
202 |
203 | println(s"building kNN on ${train.size} examples")
204 | val classifier = knn.train((5, distance), stringVectorizer)(train)
205 |
206 | val nTake = 25
207 | println(s"grabbing $nTake random test example (from ${test.size} documents)")
208 |
209 | var nCorrect = 0
210 | test.take(nTake).foreach {
211 | case (testDoc, testLabel) =>
212 | val predicted = classifier(testDoc)
213 | println(s"predicted: $predicted actual: $testLabel")
214 | if (predicted == testLabel)
215 | nCorrect += 1
216 | }
217 | println(
218 | s"\n\nAccuracy: $nCorrect / $nTake = ${(nCorrect.toFloat / nTake.toFloat) * 100.0f} %")
219 | }
220 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/Argmax.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import fif.Data
4 |
5 | import scala.reflect.ClassTag
6 |
7 | /**
8 | * Generic algorithm for finding the maximal argument. Uses the `Val`
9 | * type class as evidence of an argument's value.
10 | */
11 | object Argmax {
12 |
13 | import Data.ops._
14 |
15 | /**
16 | * Finds the maximal argument of `elements` in linear time. Uses the `Val`
17 | * type class as evidence of an argument's value.
18 | *
19 | * throws IllegalArgumentException Iff `elements` is empty.
20 | */
21 | def apply[T: Val: ClassTag, D[_]: Data](elements: D[T]): Option[T] =
22 | if (elements isEmpty)
23 | None
24 | else
25 | Some(applyUnsafe(elements))
26 |
27 | def applyUnsafe[T: Val: ClassTag, D[_]: Data](elements: D[T]): T = {
28 | val v = Val[T]
29 | elements.reduce {
30 | case (a, b) =>
31 | if (v.n.lt(v.valueOf(a), v.valueOf(b)))
32 | a
33 | else
34 | b
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/Argmin.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import fif.Data
4 |
5 | import scala.reflect.ClassTag
6 |
7 | object Argmin {
8 |
9 | def apply[T: Val: ClassTag, D[_]: Data](elements: D[T]): Option[T] =
10 | Argmax(elements)(
11 | Val.inverse,
12 | implicitly[ClassTag[T]],
13 | implicitly[Data[D]]
14 | )
15 |
16 | def applyUnsafe[T: Val: ClassTag, D[_]: Data](elements: D[T]): T =
17 | Argmax.applyUnsafe(elements)(
18 | Val.inverse,
19 | implicitly[ClassTag[T]],
20 | implicitly[Data[D]]
21 | )
22 | }
23 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/BaseMathVecOps.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.math.Semiring
4 | import breeze.storage.Zero
5 |
6 | import scala.language.higherKinds
7 |
8 | private[math] abstract class BaseMathVecOps[Num, V[_]](
9 | implicit no: Fractional[Num],
10 | zo: Zero[Num],
11 | so: Semiring[Num]
12 | ) extends MathVectorOps[V] {
13 |
14 | final override type N = Num
15 |
16 | override final implicit lazy val n = no
17 | override final implicit lazy val z = zo
18 | override final implicit lazy val s = so
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/Dense.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.math.Semiring
4 | import breeze.linalg.{Vector, DenseVector}
5 | import breeze.linalg.operators._
6 | import breeze.storage.Zero
7 | import mlbigbook.util
8 | import spire.syntax.cfor._
9 |
10 | import scala.language.{higherKinds, implicitConversions}
11 | import scala.reflect.ClassTag
12 |
13 | /**
14 | * Base partial implementation for DenseVectors. Implements the MathVectorOps
15 | * methods for the DenseVector type. Also defines the zeros, ones methds
16 | * of MathVectorOps.
17 | */
18 | protected abstract class Dense[
19 | @specialized Num: Fractional: Zero: Semiring: ClassTag]
20 | extends BaseMathVecOps[Num, DenseVector] {
21 |
22 | override final def foreach[A](v: DenseVector[A])(f: A => Any): Unit =
23 | v.foreach(f)
24 |
25 | override final def zeros(size: Int): DenseVector[N] =
26 | DenseVector.zeros[N](size)
27 |
28 | override final def ones(size: Int): DenseVector[N] =
29 | DenseVector.ones[N](size)(
30 | implicitly[ClassTag[N]],
31 | implicitly[Semiring[N]]
32 | )
33 |
34 | override final def fill[A: ClassTag: Zero](size: Int)(value: => A) =
35 | DenseVector.fill(size)(value)
36 |
37 | override final def toSeq[A: ClassTag](v: DenseVector[A]): Seq[A] =
38 | util.copyToSeq(v.toArray)
39 |
40 | override final def size(v: DenseVector[_]): Int =
41 | v.length
42 |
43 | override final def apply[A](v: DenseVector[A])(index: Int): A =
44 | v(index)
45 |
46 | override final def map[B: ClassTag: Fractional: Zero](v: DenseVector[N])(
47 | f: N => B): DenseVector[B] =
48 | v.map(f)
49 |
50 | override final def reduce[B >: N: ClassTag](v: DenseVector[N])(r: (B,
51 | B) => B) =
52 | v.reduceLeft(r)
53 |
54 | override final def fold[B: ClassTag](v: DenseVector[N])(zero: B)(
55 | combine: (B, N) => B) =
56 | v.valuesIterator.foldLeft(zero)(combine)
57 |
58 | override final def copy(v: DenseVector[N]) = {
59 | val src = v.toArray
60 | val size = src.length
61 | val cpy = new Array[N](size)
62 | System.arraycopy(src, 0, cpy, 0, size)
63 | DenseVector(cpy)
64 | }
65 | }
66 |
67 | /**
68 | * Implementation for DenseVector[Double].
69 | */
70 | object DoubleDenseMathVector extends Dense[Double] {
71 | override val addV = DenseVector.dv_dv_Op_Double_OpAdd
72 | override val addS = DenseVector.dv_s_Op_Double_OpAdd
73 | override val subV = DenseVector.dv_dv_Op_Double_OpSub
74 | override val subS = DenseVector.dv_s_Op_Double_OpSub
75 | override val dot =
76 | new OpMulInner.Impl2[DenseVector[Double], DenseVector[Double], Double] {
77 |
78 | def apply(a: DenseVector[Double], b: DenseVector[Double]) = {
79 | require(b.length == a.length, "Vectors must be the same length!")
80 | // val boff =
81 | // if (b.stride >= 0) b.offset
82 | // else b.offset + b.stride * (b.length - 1)
83 | // val aoff =
84 | // if (a.stride >= 0) a.offset
85 | // else a.offset + a.stride * (a.length - 1)
86 | // BLAS.getInstance().sdot(
87 | // a.length, b.data, boff, b.stride, a.data, aoff, a.stride
88 | // )
89 | // TODO : Do we need to take into consideration ({a,b}.{stride,offset})
90 | // into account here?
91 | var agg = 0.0
92 | cfor(0)(_ < a.length, _ + 1) { i =>
93 | agg += a(i) * b(i)
94 | }
95 | agg
96 | }
97 |
98 | implicitly[BinaryRegistry[Vector[Double],
99 | Vector[Double],
100 | OpMulInner.type,
101 | Double]].register(this)
102 | }
103 |
104 | override val divS = DenseVector.dv_s_Op_Double_OpDiv
105 | override val mulS = DenseVector.dv_s_Op_Double_OpMulScalar
106 | override val divV = DenseVector.dv_dv_Op_Double_OpDiv
107 | override val mulV = DenseVector.dv_dv_Op_Double_OpMulScalar
108 | }
109 |
110 | /**
111 | * Implementation for DenseVector[Float].
112 | */
113 | object FloatDenseMathVector extends Dense[Float] {
114 | override val addV = DenseVector.dv_dv_Op_Float_OpAdd
115 | override val addS = DenseVector.dv_s_Op_Float_OpAdd
116 | override val subV = DenseVector.dv_dv_Op_Float_OpSub
117 | override val subS = DenseVector.dv_s_Op_Float_OpSub
118 | override val dot = DenseVector.canDot_DV_DV_Float
119 | override val divS = DenseVector.dv_s_Op_Float_OpDiv
120 | override val mulS = DenseVector.dv_s_Op_Float_OpMulScalar
121 | override val divV = DenseVector.dv_dv_Op_Float_OpDiv
122 | override val mulV = DenseVector.dv_dv_Op_Float_OpMulScalar
123 | }
124 |
125 | /**
126 | * Implementation for DenseVector[Long].
127 | */
128 | object LongDenseMathVector extends Dense[Long] {
129 | override val addV = DenseVector.dv_dv_Op_Long_OpAdd
130 | override val addS = DenseVector.dv_s_Op_Long_OpAdd
131 | override val subV = DenseVector.dv_dv_Op_Long_OpSub
132 | override val subS = DenseVector.dv_s_Op_Long_OpSub
133 | override val dot = DenseVector.canDot_DV_DV_Long
134 | override val divS = DenseVector.dv_s_Op_Long_OpDiv
135 | override val mulS = DenseVector.dv_s_Op_Long_OpMulScalar
136 | override val divV = DenseVector.dv_dv_Op_Long_OpDiv
137 | override val mulV = DenseVector.dv_dv_Op_Long_OpMulScalar
138 | }
139 |
140 | /**
141 | * Implementation for DenseVector[Int].
142 | */
143 | object IntDenseMathVector extends Dense[Int] {
144 | override val addV = DenseVector.dv_dv_Op_Int_OpAdd
145 | override val addS = DenseVector.dv_s_Op_Int_OpAdd
146 | override val subV = DenseVector.dv_dv_Op_Int_OpSub
147 | override val subS = DenseVector.dv_s_Op_Int_OpSub
148 | override val dot = DenseVector.canDot_DV_DV_Int
149 | override val divS = DenseVector.dv_s_Op_Int_OpDiv
150 | override val mulS = DenseVector.dv_s_Op_Int_OpMulScalar
151 | override val divV = DenseVector.dv_dv_Op_Int_OpDiv
152 | override val mulV = DenseVector.dv_dv_Op_Int_OpMulScalar
153 | }
154 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/MathVectorOps.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.math.Semiring
4 | import breeze.linalg.operators._
5 | import breeze.storage.Zero
6 |
7 | import scala.language.{higherKinds, implicitConversions}
8 | import scala.reflect.ClassTag
9 |
10 | /**
11 | * An abstraction specifying operations one may perform using vectors and
12 | * scalar values. These operations include element-wise & scalar
13 | * multiplication, division, addition, and subtraction. Support for the dot
14 | * product of two vectors is also included. As well as methods to construct new
15 | * vector instances.
16 | */
17 | trait MathVectorOps[V[_]] extends VectorOps[V] {
18 |
19 | type N
20 | implicit val n: Fractional[N]
21 | implicit val z: Zero[N]
22 | implicit val s: Semiring[N]
23 |
24 | /**
25 | * Creates a new vector of the input size where each element has value 0.
26 | */
27 | def zeros(size: Int): V[N]
28 |
29 | /**
30 | * Creates a new vector of the input size where each element has value 1.
31 | */
32 | def ones(size: Int): V[N]
33 |
34 | protected lazy val zero = implicitly[Fractional[N]].zero
35 | protected lazy val one = implicitly[Fractional[N]].one
36 |
37 | /**
38 | * Change every element of a vector V using the function f.
39 | * No side effects.
40 | */
41 | def map[B: ClassTag: Fractional: Zero](v: V[N])(f: N => B): V[B]
42 |
43 | /**
44 | * Apply a binary combination operator, r, to pairs of elements from the
45 | * input vector, v. Note that the output of r shall be applied to both
46 | * vector elements as well as other, previous outputs from r. The order of
47 | * execution is not guaranteed. Therefore, it is important that r is
48 | * associative and communiative.
49 | */
50 | def reduce[A1 >: N: ClassTag](v: V[N])(r: (A1, A1) => A1): A1
51 |
52 | /**
53 | * From the starting value, zero, applies the function combine to elements
54 | * of the input vector v. This method evaluates to the final accumulated
55 | * value of this operation across all elements of the vector. Execution
56 | * order is not guaranteed, so combine must be side-effect free,
57 | * associative, and communicative.
58 | */
59 | def fold[B: ClassTag](v: V[N])(zero: B)(combine: (B, N) => B): B
60 |
61 | /**
62 | * Create a new vector of the input size where each element has the value v.
63 | */
64 | def fill[A: ClassTag: Zero](size: Int)(v: => A): V[A]
65 |
66 | /**
67 | * Performs a shallow copy of the vector's contents. Each element is copied
68 | * to a newly allocated vector of type V[N]. If N is a primitive or other
69 | * value type, then this will be a deep copy. Otherwise, the reference will
70 | * be copied.
71 | */
72 | def copy(v: V[N]): V[N]
73 |
74 | /**
75 | * Performs element-wise addition of two vectors.
76 | */
77 | val addV: OpAdd.Impl2[V[N], V[N], V[N]]
78 |
79 | /**
80 | * Adds a scalar to each element of a vector.
81 | */
82 | val addS: OpAdd.Impl2[V[N], N, V[N]]
83 |
84 | /**
85 | * Performs element-wise subtraction of two vectors.
86 | */
87 | val subV: OpSub.Impl2[V[N], V[N], V[N]]
88 |
89 | /**
90 | * Subtracts a scalar from each element of a vector.
91 | */
92 | val subS: OpSub.Impl2[V[N], N, V[N]]
93 |
94 | /**
95 | * Performs a dot product operation between two vectors,
96 | * which results in a scalar.
97 | */
98 | val dot: OpMulInner.Impl2[V[N], V[N], N]
99 |
100 | /**
101 | * Performs element-wise multiplication between two vectors.
102 | */
103 | val mulV: OpMulScalar.Impl2[V[N], V[N], V[N]]
104 |
105 | /**
106 | * Multiplies each vector element by a scalar.
107 | */
108 | val mulS: OpMulScalar.Impl2[V[N], N, V[N]]
109 |
110 | /**
111 | * Performs element-wise division between two vectors.
112 | */
113 | val divV: OpDiv.Impl2[V[N], V[N], V[N]]
114 |
115 | /**
116 | * Divides each vector element by a scalar.
117 | */
118 | val divS: OpDiv.Impl2[V[N], N, V[N]]
119 |
120 | }
121 |
122 | object MathVectorOps {
123 |
124 | type Type[Num, Vec[_]] = MathVectorOps[Vec] {
125 | type N = Num
126 | }
127 |
128 | object Implicits {
129 | // dense operations
130 | implicit val DoubleDenseVot = DoubleDenseMathVector
131 | implicit val FloatDenseVot = FloatDenseMathVector
132 | implicit val LongDenseVot = LongDenseMathVector
133 | implicit val IntDenseVot = IntDenseMathVector
134 | // sparse operations
135 | implicit val DoubleSparseVot = DoubleSparseMathVector
136 | implicit val FloatSparseVot = FloatSparseMathVector
137 | implicit val LongSparseVot = LongSparseMathVector
138 | implicit val IntSparseVot = IntSparseMathVector
139 | }
140 |
141 | }
142 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/NumericConversion.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import scala.reflect.ClassTag
4 |
5 | /**
6 | * Typeclass supporting conversions between primitive types, with the
7 | * constraint that the primitive has Numeric evidence.
8 | */
9 | sealed abstract class NumericConversion[@specialized N] {
10 |
11 | def fromInt(i: Int): N
12 |
13 | def fromLong(l: Long): N
14 |
15 | def fromDouble(d: Double): N
16 |
17 | def fromByte(b: Byte): N
18 |
19 | def fromShort(s: Short): N
20 |
21 | def fromFloat(f: Float): N
22 |
23 | implicit def ct: ClassTag[N]
24 | }
25 |
26 | object NumericConversion {
27 |
28 | def apply[N: NumericConversion]: NumericConversion[N] =
29 | implicitly[NumericConversion[N]]
30 |
31 | /**
32 | * Implicit NumericConversion instances for every primitive numeric type:
33 | * float, long, double, int, short, byte
34 | */
35 | object Implicits {
36 |
37 | implicit case object FloatC extends NumericConversion[Float] {
38 | override implicit val ct: ClassTag[Float] = ClassTag(classOf[Float])
39 | override def fromInt(l: Int) = l.toFloat
40 | override def fromLong(l: Long) = l.toFloat
41 | override def fromShort(s: Short) = s.toFloat
42 | override def fromByte(b: Byte) = b.toFloat
43 | override def fromDouble(d: Double) = d.toFloat
44 | override def fromFloat(f: Float) = f
45 | }
46 |
47 | implicit case object LongC extends NumericConversion[Long] {
48 | override implicit val ct: ClassTag[Long] = ClassTag(classOf[Long])
49 | override def fromInt(l: Int) = l.toLong
50 | override def fromLong(l: Long) = l
51 | override def fromShort(s: Short) = s.toLong
52 | override def fromByte(b: Byte) = b.toLong
53 | override def fromDouble(d: Double) = d.toLong
54 | override def fromFloat(f: Float) = f.toLong
55 | }
56 |
57 | implicit case object DoubleC extends NumericConversion[Double] {
58 | override implicit val ct: ClassTag[Double] = ClassTag(classOf[Double])
59 | override def fromInt(l: Int) = l.toDouble
60 | override def fromLong(l: Long): Double = l.toDouble
61 | override def fromShort(s: Short): Double = s.toDouble
62 | override def fromByte(b: Byte): Double = b.toDouble
63 | override def fromDouble(d: Double): Double = d
64 | override def fromFloat(f: Float): Double = f.toDouble
65 | }
66 |
67 | implicit case object IntC extends NumericConversion[Int] {
68 | override implicit val ct: ClassTag[Int] = ClassTag(classOf[Int])
69 | override def fromInt(l: Int) = l.toInt
70 | override def fromLong(l: Long) = l.toInt
71 | override def fromShort(s: Short) = s.toInt
72 | override def fromByte(b: Byte) = b.toInt
73 | override def fromDouble(d: Double) = d.toInt
74 | override def fromFloat(f: Float) = f.toInt
75 | }
76 |
77 | implicit case object ShortC extends NumericConversion[Short] {
78 | override implicit val ct: ClassTag[Short] = ClassTag(classOf[Short])
79 | override def fromInt(l: Int) = l.toShort
80 | override def fromLong(l: Long) = l.toShort
81 | override def fromShort(s: Short) = s
82 | override def fromByte(b: Byte) = b.toShort
83 | override def fromDouble(d: Double) = d.toShort
84 | override def fromFloat(f: Float) = f.toShort
85 | }
86 |
87 | implicit case object ByteC extends NumericConversion[Byte] {
88 | override implicit val ct: ClassTag[Byte] = ClassTag(classOf[Byte])
89 | override def fromInt(l: Int) = l.toByte
90 | override def fromLong(l: Long) = l.toByte
91 | override def fromShort(s: Short) = s.toByte
92 | override def fromByte(b: Byte) = b
93 | override def fromDouble(d: Double) = d.toByte
94 | override def fromFloat(f: Float) = f.toByte
95 | }
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/RandoMut.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import scala.util.Random
4 |
5 | abstract class RandoMut[N: Fractional] {
6 | def next(): N
7 | }
8 |
9 | object RandoMut {
10 |
11 | def newSeedPerCall[N: Fractional: NumericConversion]: () => RandoMut[N] =
12 | () => fromSeed(Random.nextLong())
13 |
14 | def fromSeed[N: Fractional: NumericConversion](seed: Long): RandoMut[N] = {
15 | val r = new Random(seed)
16 | new RandoMut[N] {
17 | override def next() = NumericConversion[N].fromDouble(r.nextDouble())
18 | }
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/Sparse.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.SparseVector
4 | import breeze.linalg.operators._
5 | import breeze.math.Semiring
6 | import breeze.storage.Zero
7 | import spire.syntax.cfor._
8 |
9 | import scala.language.higherKinds
10 | import scala.reflect.ClassTag
11 |
12 | /**
13 | * Base partial implementation for DenseVectors. Implements the MathVectorOps
14 | * methods for the DenseVector type. Also defines the zeros, ones methds
15 | * of MathVectorOps.
16 | */
17 | protected abstract class Sparse[
18 | @specialized Num: Fractional: Zero: Semiring: ClassTag]
19 | extends BaseMathVecOps[Num, SparseVector] {
20 |
21 | override final def foreach[A](v: SparseVector[A])(f: A => Any) =
22 | v.foreach(f)
23 |
24 | override final def zeros(size: Int) =
25 | SparseVector.zeros[N](size)
26 |
27 | override final def ones(size: Int) =
28 | SparseVector.fill(size)(one)
29 |
30 | override final def fill[A: ClassTag: Zero](size: Int)(value: => A) =
31 | SparseVector.fill(size)(value)
32 |
33 | override final def toSeq[A: ClassTag](v: SparseVector[A]) = {
34 | val values = new Array[A](v.length)
35 | cfor(0)(_ < values.length, _ + 1) { i =>
36 | values(i) = v(i)
37 | }
38 | values.toSeq
39 | }
40 |
41 | override final def size(v: SparseVector[_]): Int =
42 | v.length
43 |
44 | override final def apply[A](v: SparseVector[A])(index: Int) =
45 | v(index)
46 |
47 | import SparseVector._
48 |
49 | override final def map[B: ClassTag: Fractional: Zero](v: SparseVector[N])(
50 | f: N => B) =
51 | v.map(f)
52 |
53 | override final def reduce[A1 >: N: ClassTag](v: SparseVector[N])(
54 | r: (A1, A1) => A1) =
55 | v.reduceLeft(r)
56 |
57 | override final def fold[B: ClassTag](v: SparseVector[N])(zero: B)(
58 | combine: (B, N) => B) =
59 | v.valuesIterator.foldLeft(zero)(combine)
60 |
61 | override final def copy(v: SparseVector[N]) =
62 | v.copy
63 | }
64 |
65 | /**
66 | * Implementation for SparseVector[Double].
67 | */
68 | object DoubleSparseMathVector extends Sparse[Double] {
69 | override val subS =
70 | new OpSub.Impl2[SparseVector[Double], Double, SparseVector[Double]] {
71 | override def apply(v: SparseVector[Double], v2: Double) = v.map {
72 | _ - v2
73 | }
74 | }
75 | override val subV = SparseVector.implOps_SVT_SVT_eq_SVT_Double_OpSub
76 |
77 | override val addS =
78 | new OpAdd.Impl2[SparseVector[Double], Double, SparseVector[Double]] {
79 | override def apply(v: SparseVector[Double], v2: Double) = v.map {
80 | _ + v2
81 | }
82 | }
83 | override val addV = SparseVector.implOps_SVT_SVT_eq_SVT_Double_OpAdd
84 |
85 | override val dot =
86 | new OpMulInner.Impl2[SparseVector[Double], SparseVector[Double], Double] {
87 | override def apply(v: SparseVector[Double], v2: SparseVector[Double]) =
88 | v.dot(v2)
89 | }
90 |
91 | override val divS =
92 | new OpDiv.Impl2[SparseVector[Double], Double, SparseVector[Double]] {
93 | override def apply(v: SparseVector[Double], v2: Double) = v.map {
94 | _ / v2
95 | }
96 | }
97 | override val divV = SparseVector.implOps_SVT_SVT_eq_SVT_Double_OpDiv
98 |
99 | override val mulS =
100 | new OpMulScalar.Impl2[SparseVector[Double], Double, SparseVector[Double]] {
101 | override def apply(v: SparseVector[Double], v2: Double) = v.map {
102 | _ * v2
103 | }
104 | }
105 | override val mulV = SparseVector.implOpMulScalar_SVT_SVT_eq_SVT_Double
106 | }
107 |
108 | /**
109 | * Implementation for SparseVector[Float].
110 | */
111 | object FloatSparseMathVector extends Sparse[Float] {
112 | override val subS =
113 | new OpSub.Impl2[SparseVector[Float], Float, SparseVector[Float]] {
114 | override def apply(v: SparseVector[Float], v2: Float) = v.map { _ - v2 }
115 | }
116 | override val subV = SparseVector.implOps_SVT_SVT_eq_SVT_Float_OpSub
117 |
118 | override val addS =
119 | new OpAdd.Impl2[SparseVector[Float], Float, SparseVector[Float]] {
120 | override def apply(v: SparseVector[Float], v2: Float) = v.map { _ + v2 }
121 | }
122 | override val addV = SparseVector.implOps_SVT_SVT_eq_SVT_Float_OpAdd
123 |
124 | override val dot =
125 | new OpMulInner.Impl2[SparseVector[Float], SparseVector[Float], Float] {
126 | override def apply(v: SparseVector[Float], v2: SparseVector[Float]) =
127 | v.dot(v2)
128 | }
129 |
130 | override val divS =
131 | new OpDiv.Impl2[SparseVector[Float], Float, SparseVector[Float]] {
132 | override def apply(v: SparseVector[Float], v2: Float) = v.map { _ / v2 }
133 | }
134 | override val divV = SparseVector.implOps_SVT_SVT_eq_SVT_Float_OpDiv
135 |
136 | override val mulS =
137 | new OpMulScalar.Impl2[SparseVector[Float], Float, SparseVector[Float]] {
138 | override def apply(v: SparseVector[Float], v2: Float) = v.map { _ * v2 }
139 | }
140 | override val mulV = SparseVector.implOpMulScalar_SVT_SVT_eq_SVT_Float
141 | }
142 |
143 | /**
144 | * Implementation for SparseVector[Long].
145 | */
146 | object LongSparseMathVector extends Sparse[Long] {
147 | override val subS =
148 | new OpSub.Impl2[SparseVector[Long], Long, SparseVector[Long]] {
149 | override def apply(v: SparseVector[Long], v2: Long) = v.map { _ - v2 }
150 | }
151 | override val subV = SparseVector.implOps_SVT_SVT_eq_SVT_Long_OpSub
152 |
153 | override val addS =
154 | new OpAdd.Impl2[SparseVector[Long], Long, SparseVector[Long]] {
155 | override def apply(v: SparseVector[Long], v2: Long) = v.map { _ + v2 }
156 | }
157 | override val addV = SparseVector.implOps_SVT_SVT_eq_SVT_Long_OpAdd
158 |
159 | override val dot =
160 | new OpMulInner.Impl2[SparseVector[Long], SparseVector[Long], Long] {
161 | override def apply(v: SparseVector[Long], v2: SparseVector[Long]) =
162 | v.dot(v2)
163 | }
164 |
165 | override val divS =
166 | new OpDiv.Impl2[SparseVector[Long], Long, SparseVector[Long]] {
167 | override def apply(v: SparseVector[Long], v2: Long) = v.map { _ / v2 }
168 | }
169 | override val divV = SparseVector.implOps_SVT_SVT_eq_SVT_Long_OpDiv
170 |
171 | override val mulS =
172 | new OpMulScalar.Impl2[SparseVector[Long], Long, SparseVector[Long]] {
173 | override def apply(v: SparseVector[Long], v2: Long) = v.map { _ * v2 }
174 | }
175 | override val mulV = SparseVector.implOpMulScalar_SVT_SVT_eq_SVT_Long
176 | }
177 |
178 | /**
179 | * Implementation for SparseVector[Int].
180 | */
181 | object IntSparseMathVector extends Sparse[Int] {
182 | override val subS =
183 | new OpSub.Impl2[SparseVector[Int], Int, SparseVector[Int]] {
184 | override def apply(v: SparseVector[Int], v2: Int) = v.map { _ - v2 }
185 | }
186 | override val subV = SparseVector.implOps_SVT_SVT_eq_SVT_Int_OpSub
187 |
188 | override val addS =
189 | new OpAdd.Impl2[SparseVector[Int], Int, SparseVector[Int]] {
190 | override def apply(v: SparseVector[Int], v2: Int) = v.map { _ + v2 }
191 | }
192 | override val addV = SparseVector.implOps_SVT_SVT_eq_SVT_Int_OpAdd
193 |
194 | override val dot =
195 | new OpMulInner.Impl2[SparseVector[Int], SparseVector[Int], Int] {
196 | override def apply(v: SparseVector[Int], v2: SparseVector[Int]) =
197 | v.dot(v2)
198 | }
199 |
200 | override val divS =
201 | new OpDiv.Impl2[SparseVector[Int], Int, SparseVector[Int]] {
202 | override def apply(v: SparseVector[Int], v2: Int) = v.map { _ / v2 }
203 | }
204 | override val divV = SparseVector.implOps_SVT_SVT_eq_SVT_Int_OpDiv
205 |
206 | override val mulS =
207 | new OpMulScalar.Impl2[SparseVector[Int], Int, SparseVector[Int]] {
208 | override def apply(v: SparseVector[Int], v2: Int) = v.map { _ * v2 }
209 | }
210 | override val mulV = SparseVector.implOpMulScalar_SVT_SVT_eq_SVT_Int
211 | }
212 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/Val.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | /** Type class for giving a value to a type `X`. */
4 | trait Val[-X] {
5 |
6 | type N
7 | implicit def n: Numeric[N]
8 |
9 | def valueOf(a: X): N
10 | }
11 |
12 | object Val {
13 |
14 | def apply[V: Val]: Val[V] = implicitly[Val[V]]
15 |
16 | def inverse[V: Val]: Val[V] = {
17 | val original = Val[V]
18 | new Val[V] {
19 | override type N = original.N
20 | override lazy val n = original.n
21 | @inline override def valueOf(a: V) =
22 | original.n.minus(original.n.zero, original.valueOf(a))
23 | }
24 | }
25 |
26 | object Implicits {
27 |
28 | implicit def identityVal[X: Numeric] = {
29 | val evidence = implicitly[Numeric[X]]
30 | new Val[X] {
31 | override type N = X
32 | override implicit lazy val n = evidence
33 | @inline override def valueOf(a: X) = a
34 | }
35 | }
36 |
37 | implicit def tupleValIn1st[Num: Numeric, X] = {
38 | val evidence = implicitly[Numeric[Num]]
39 | new Val[(Num, X)] {
40 | override type N = Num
41 | override implicit lazy val n = evidence
42 | @inline override def valueOf(a: (Num, X)) = a._1
43 | }
44 | }
45 |
46 | implicit def tupleValIn2nd[X, Num: Numeric] = {
47 | val evidence = implicitly[Numeric[Num]]
48 | new Val[(X, Num)] {
49 | override type N = Num
50 | override implicit lazy val n = evidence
51 | @inline override def valueOf(a: (X, Num)) = a._2
52 | }
53 | }
54 |
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/VectorOps.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import simulacrum.typeclass
4 |
5 | import scala.language.higherKinds
6 | import scala.reflect.ClassTag
7 |
8 | /**
9 | * An abstraction specifying operations one may perform using vectors and
10 | * scalar values. These operations include element-wise & scalar
11 | * multiplication, division, addition, and subtraction. Support for the dot
12 | * product of two vectors is also included. As well as methods to construct new
13 | * vector instances.
14 | */
15 | @typeclass
16 | trait VectorOps[V[_]] {
17 |
18 | def apply[A](v: V[A])(index: Int): A
19 |
20 | def toSeq[A: ClassTag](v: V[A]): Seq[A]
21 |
22 | def size(v: V[_]): Int
23 |
24 | def foreach[A](v: V[A])(f: A => Any): Unit
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/math/package.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook
2 |
3 | package object math {
4 |
5 | implicit val intIsFractional = new Fractional[Int] {
6 | @inline override def div(x: Int, y: Int) = x / y
7 | @inline override def toDouble(x: Int) = x.toDouble
8 | @inline override def plus(x: Int, y: Int) = x + y
9 | @inline override def toFloat(x: Int) = x.toFloat
10 | @inline override def toInt(x: Int) = x
11 | @inline override def negate(x: Int) = -x
12 | @inline override def fromInt(x: Int) = x
13 | @inline override def toLong(x: Int) = x.toLong
14 | @inline override def times(x: Int, y: Int) = x * y
15 | @inline override def minus(x: Int, y: Int) = x - y
16 | @inline override def compare(x: Int, y: Int) = x - y
17 | }
18 |
19 | implicit val longIsFractional = new Fractional[Long] {
20 | @inline override def div(x: Long, y: Long) = x / y
21 | @inline override def toDouble(x: Long) = x.toDouble
22 | @inline override def plus(x: Long, y: Long) = x + y
23 | @inline override def toFloat(x: Long) = x.toFloat
24 | @inline override def toInt(x: Long) = x.toInt
25 | @inline override def negate(x: Long) = -x
26 | @inline override def fromInt(x: Int) = x.toLong
27 | @inline override def toLong(x: Long) = x
28 | @inline override def times(x: Long, y: Long) = x * y
29 | @inline override def minus(x: Long, y: Long) = x - y
30 | @inline override def compare(x: Long, y: Long) = (x - y).toInt
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/ClassificationModule.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 |
5 | import scala.annotation.tailrec
6 | import scala.language.{higherKinds, postfixOps, reflectiveCalls}
7 | import scala.reflect.ClassTag
8 |
9 | trait ClassificationModule extends ItemNumVecModule {
10 |
11 | type Label
12 | val emptyLabel: Label
13 |
14 | type Classifier = Item => Label
15 |
16 | type Vectorizer = {
17 | val vectorize: Item => V[N]
18 | val nDimensions: Int
19 | }
20 |
21 | type Conf
22 |
23 | import Data.ops._
24 |
25 | final def train[D[_]: Data](
26 | c: Conf,
27 | mkVectorizer: D[(Item, Label)] => Vectorizer
28 | )(data: D[(Item, Label)]): Classifier =
29 | train(
30 | c,
31 | mkVectorizer { data }
32 | )(data)
33 |
34 | def train[D[_]: Data](
35 | c: Conf,
36 | toVec: Vectorizer
37 | )(data: D[(Item, Label)]): Classifier
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/ClusteringConf.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | case class ClusteringConf(
4 | nClusters: Int,
5 | tolerance: Double,
6 | maxIterations: Int
7 | )
8 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/ClusteringModule.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 |
5 | import scala.language.{postfixOps, higherKinds, reflectiveCalls}
6 |
7 | trait ClusteringModule extends ItemNumVecModule {
8 |
9 | type Vectorizer = {
10 | val vectorize: Item => V[N]
11 | val nDimensions: Int
12 | }
13 |
14 | type Distance = (V[N], V[N]) => N
15 |
16 | case class Center(id: String, mean: V[N])
17 |
18 | final def cluster[D[_]: Data](
19 | conf: ClusteringConf,
20 | dist: Distance,
21 | mkVectorizer: D[Item] => Vectorizer
22 | )(data: D[Item]): Seq[Center] =
23 | cluster(conf, dist, mkVectorizer(data))(data)
24 |
25 | def cluster[D[_]: Data](
26 | conf: ClusteringConf,
27 | dist: Distance,
28 | toVec: Vectorizer
29 | )(data: D[Item]): Seq[Center]
30 |
31 | import Data.ops._
32 |
33 | final def assign[D[_]: Data](
34 | centers: Seq[Center],
35 | distance: Distance,
36 | vectorizer: Vectorizer
37 | )(
38 | data: D[Item]
39 | ): D[String] =
40 | assign(centers, distance)(
41 | data map { vectorizer.vectorize }
42 | )
43 |
44 | final def assign[D[_]: Data](
45 | centers: Seq[Center],
46 | distance: Distance
47 | )(
48 | data: D[V[N]]
49 | ): D[String] =
50 | if (centers isEmpty)
51 | data map { _ =>
52 | ""
53 | } else if (centers.size == 1) {
54 | val label = centers.head.id
55 | data map { _ =>
56 | label
57 | }
58 |
59 | } else {
60 |
61 | val lessThan = implicitly[Numeric[N]].lt _
62 | val restCents = centers.slice(1, centers.size)
63 |
64 | data map { v =>
65 | val (nearestLabel, _) =
66 | restCents.foldLeft(centers.head.id, distance(centers.head.mean, v)) {
67 |
68 | case (currChampion @ (minLabel, minDistance), center) =>
69 | val distToCenter = distance(center.mean, v)
70 | if (lessThan(distToCenter, minDistance))
71 | (center.id, distToCenter)
72 | else
73 | currChampion
74 | }
75 |
76 | nearestLabel
77 | }
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/CustomHashMap.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import scala.annotation.tailrec
4 |
5 | final class CustomHashMap[K: Hashable, V](
6 | private[this] val hashedKey2val: Map[Int, V],
7 | private[this] val hashedKeys: List[K]
8 | ) extends Map[K, V] {
9 |
10 | override def +[B1 >: V](kv: (K, B1)): Map[K, B1] = {
11 | val (key, value) = kv
12 | val id = implicitly[Hashable[K]].hash(key)
13 |
14 | if (hashedKey2val contains id)
15 | new CustomHashMap(
16 | (hashedKey2val - id) + (id -> value),
17 | hashedKeys
18 | )
19 | else
20 | new CustomHashMap(
21 | hashedKey2val + (id -> value),
22 | hashedKeys :+ key
23 | )
24 | }
25 |
26 | override def get(key: K): Option[V] = {
27 | val id = implicitly[Hashable[K]].hash(key)
28 | hashedKey2val.get(id)
29 | }
30 |
31 | override def iterator: Iterator[(K, V)] =
32 | hashedKeys.toIterator.map { key =>
33 | val id = implicitly[Hashable[K]].hash(key)
34 | (key, hashedKey2val(id))
35 | }
36 |
37 | override def -(key: K): Map[K, V] = {
38 | val id = implicitly[Hashable[K]].hash(key)
39 | if (hashedKey2val contains id)
40 | new CustomHashMap(
41 | hashedKey2val - id,
42 | remove(id, hashedKeys, Nil)
43 | )
44 | else
45 | this
46 | }
47 |
48 | @tailrec
49 | private[this] def remove(
50 | idOfKeyToRemove: Int,
51 | before: List[K],
52 | remaining: List[K]
53 | ): List[K] =
54 | remaining match {
55 |
56 | case anotherKey :: restOfList =>
57 | val idOfAnother = implicitly[Hashable[K]].hash(anotherKey)
58 | if (idOfAnother == idOfKeyToRemove)
59 | before ++ restOfList
60 | else
61 | remove(idOfKeyToRemove, before :+ anotherKey, restOfList)
62 |
63 | case Nil =>
64 | before
65 | }
66 |
67 | }
68 |
69 | object CustomHashMap {
70 |
71 | def empty[K: Hashable, V]: Map[K, V] =
72 | new CustomHashMap[K, V](Map.empty[Int, V], Nil)
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/Hashable.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import simulacrum._
4 |
5 | @typeclass
6 | trait Hashable[T] {
7 | def hash(t: T): Int
8 | }
9 |
10 | object ImplicitHashable {
11 |
12 | implicit val bIsH: Hashable[Boolean] = new Hashable[Boolean] {
13 | @inline override def hash(t: Boolean) = if (t) 1 else 0
14 | }
15 |
16 | implicit val iIsH: Hashable[Int] = new Hashable[Int] {
17 | @inline override def hash(t: Int) = t
18 | }
19 |
20 | implicit val sIsH: Hashable[String] = new Hashable[String] {
21 | @inline override def hash(t: String) = t.hashCode
22 | }
23 |
24 | implicit def optIsH[T: Hashable]: Hashable[Option[T]] =
25 | new Hashable[Option[T]] {
26 | import Hashable.ops._
27 |
28 | @inline override def hash(maybeT: Option[T]) = maybeT match {
29 | case Some(t) => t.hash
30 | case None => 0
31 | }
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/ItemNumVecModule.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import breeze.math.Semiring
4 | import breeze.storage.Zero
5 | import mlbigbook.math.MathVectorOps
6 |
7 | import scala.language.{higherKinds, postfixOps, reflectiveCalls}
8 | import scala.reflect.ClassTag
9 |
10 | trait ItemNumVecModule {
11 |
12 | type Item
13 | type N
14 | type V[_]
15 |
16 | // vops serves as a type class for numeric vector operations
17 | // having an instance of type MathVectorOps[N,V] implies constraints on N and V
18 | val vops: MathVectorOps.Type[N, V]
19 |
20 | // we can get these type classes for N
21 | implicit lazy final val nFrac: Fractional[N] = vops.n
22 | implicit lazy final val nSr: Semiring[N] = vops.s
23 | implicit lazy final val nZero: Zero[N] = vops.z
24 |
25 | // Class tag support for abstract types
26 | implicit val ctN: ClassTag[N]
27 | implicit val ctI: ClassTag[Item]
28 | // support for the numerical vector type
29 | implicit val ctVn: ClassTag[V[N]]
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/Kmeans.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 | import mlbigbook.math.{MathVectorOps, RandoMut}
5 |
6 | import scala.annotation.tailrec
7 | import scala.language.{higherKinds, reflectiveCalls}
8 | import scala.reflect.ClassTag
9 |
10 | trait Kmeans extends ClusteringModule {
11 |
12 | /** Creates a pseudo-random number generator for the type N. */
13 | val mkRandomNumGen: () => RandoMut[N]
14 |
15 | // Brings in the Data type class operations as methods "accessible" using
16 | // familiar object dot notation.
17 | // i.e. `data.map` instead of `implicitly[Data[D]].map(data)`
18 | import Data.ops._
19 |
20 | override final def cluster[D[_]: Data](
21 | conf: ClusteringConf,
22 | dist: Distance,
23 | toVec: Vectorizer
24 | )(data: D[Item]): Seq[Center] =
25 | cluster_h(
26 | conf,
27 | dist,
28 | toVec,
29 | 0,
30 | data map { toVec.vectorize },
31 | initialize(conf.nClusters, toVec.nDimensions)
32 | )
33 |
34 | final def initialize(
35 | nClusters: Int,
36 | nDimensions: Int
37 | ): Seq[Center] = {
38 | val r = mkRandomNumGen()
39 | (0 until nClusters).map { id =>
40 | Center(
41 | id = id.toString,
42 | mean = vops.map(vops.ones(nDimensions)) { one =>
43 | vops.n.times(one, r.next())
44 | }
45 | )
46 | }.toSeq
47 | }
48 |
49 | @tailrec
50 | private[this] final def cluster_h[D[_]: Data](
51 | conf: ClusteringConf,
52 | dist: Distance,
53 | toVec: Vectorizer,
54 | currIter: Int,
55 | data: D[V[N]],
56 | currCenters: Seq[Center]
57 | ): Seq[Center] =
58 | if (currIter >= conf.maxIterations)
59 | currCenters
60 | else {
61 |
62 | val updatedCenters = updateCenters(dist, toVec, currCenters, data)
63 |
64 | println(
65 | s"""[center check: currIter=$currIter]
66 | |[ORIGINAL # ${currCenters.size}] ${currCenters.mkString("\t")}
67 | |[UPDATED # ${updatedCenters.size}] ${updatedCenters.mkString(
68 | "\t")}
69 | """.stripMargin
70 | )
71 |
72 | val sumSquaredChangeInMeansBetweenIters =
73 | currCenters.zip(updatedCenters).foldLeft(0.0) {
74 | case (accum, (existing, updated)) =>
75 | val d = math.abs(
76 | implicitly[Numeric[N]].toDouble(
77 | dist(existing.mean, updated.mean)
78 | )
79 | )
80 | accum + d
81 | }
82 |
83 | if (sumSquaredChangeInMeansBetweenIters < conf.tolerance)
84 | updatedCenters
85 | else
86 | cluster_h(
87 | conf,
88 | dist,
89 | toVec,
90 | currIter + 1,
91 | data,
92 | updatedCenters
93 | )
94 | }
95 |
96 | def updateCenters[D[_]: Data](
97 | dist: Distance,
98 | toVec: Vectorizer,
99 | centers: Seq[Center],
100 | data: D[V[N]]
101 | ): Seq[Center] =
102 | data
103 | .zip(assign(centers, dist)(data))
104 | .groupBy { case (_, assignment) => assignment }
105 | .map {
106 | case (label, bothDataAndLabel) =>
107 | val summed =
108 | bothDataAndLabel.foldLeft(vops.zeros(toVec.nDimensions)) {
109 | case (summing, (vector, _)) =>
110 | vops.addV(summing, vector)
111 | }
112 |
113 | val newMean =
114 | vops.divS(
115 | summed,
116 | implicitly[Numeric[N]].fromInt(bothDataAndLabel.size)
117 | )
118 |
119 | Center(
120 | id = label,
121 | mean = newMean
122 | )
123 | }
124 | .toSeq
125 |
126 | }
127 |
128 | object Kmeans {
129 |
130 | type Type[ItemToCluster, Num, Vec[_]] = Kmeans {
131 | type Item = ItemToCluster
132 | type N = Num
133 | type V[_] = Vec[_]
134 | }
135 |
136 | def apply[ItemToCluster, Num, Vec[_]](
137 | mathVectorOps: MathVectorOps.Type[Num, Vec],
138 | mkRando: () => RandoMut[Num]
139 | )(
140 | implicit ctForI: ClassTag[ItemToCluster],
141 | ctForN: ClassTag[Num],
142 | ctForVn: ClassTag[Vec[Num]]
143 | ): Type[ItemToCluster, Num, Vec] = {
144 |
145 | // val okVops: MathVectorOps.Type[Type[ItemToCluster, Num, Vec]#N, Type[ItemToCluster, Num, Vec]#V] =
146 | // mathVectorOps
147 | // mathVectorOps.asInstanceOf[MathVectorOps.Type[Type[ItemToCluster, Num, Vec]#N, Type[ItemToCluster, Num, Vec]#V]]
148 |
149 | // val okCtVn: ClassTag[Type[ItemToCluster, Num, Vec]#V[Type[ItemToCluster, Num, Vec]#N]] =
150 | // ctForVn
151 | // ctForVn.asInstanceOf[ClassTag[Type[ItemToCluster, Num, Vec]#V[Type[ItemToCluster, Num, Vec]#N]]]
152 |
153 | new Kmeans {
154 |
155 | override type Item = ItemToCluster
156 | override type N = Num
157 | override type V[_] = Vec[_]
158 |
159 | override lazy val mkRandomNumGen = mkRando
160 | override lazy val vops =
161 | mathVectorOps.asInstanceOf[MathVectorOps.Type[N, V]]
162 |
163 | override implicit lazy val ctI = ctForI
164 | override implicit lazy val ctN = ctForN
165 | override implicit lazy val ctVn = ctForVn.asInstanceOf[ClassTag[V[N]]]
166 | }
167 | }
168 |
169 | }
170 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/KnnClassifier.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 | import mlbigbook.math.{MathVectorOps, Val, Argmax}
5 |
6 | import scala.language.{higherKinds, postfixOps, reflectiveCalls}
7 | import scala.reflect.ClassTag
8 |
9 | trait KnnClassifier extends ClassificationModule {
10 |
11 | final type NeighborhoodSize = Int
12 |
13 | type Distance = (V[N], V[N]) => N
14 | override final type Conf = (NeighborhoodSize, Distance)
15 |
16 | implicit val labelHash: Hashable[Label]
17 |
18 | private[this] lazy val nnRankMod =
19 | NearestNeighbors[(Item, Label), N, V](vops)
20 |
21 | def train[D[_]: Data](
22 | c: (NeighborhoodSize, Distance),
23 | toVec: Vectorizer
24 | )(data: D[(Item, Label)]): Classifier = {
25 |
26 | val (nSize, dist) = c
27 |
28 | val nnRanker = nnRankMod.mkRanker[D](
29 | dist.asInstanceOf[nnRankMod.Distance],
30 | new {
31 | val vectorize: ((Item, Label)) => V[N] = {
32 | case (item, _) => toVec.vectorize(item)
33 | }
34 | val nDimensions = toVec.nDimensions
35 | }
36 | )(data)
37 |
38 | itemToClassify =>
39 | {
40 |
41 | val neighborhood = nnRanker(nSize)((itemToClassify, emptyLabel))
42 |
43 | val votesForNeighborhood: Map[Label, Int] =
44 | neighborhood.foldLeft(CustomHashMap.empty[Label, Int]) {
45 | case (label2count, (item, label)) =>
46 | if (label2count contains label)
47 | (label2count - label) + (label -> (label2count(label) + 1))
48 | else
49 | label2count + (label -> 1)
50 | }
51 |
52 | import fif.ImplicitCollectionsData.seqIsData
53 | import Val.Implicits.tupleValIn2nd
54 |
55 | Argmax(votesForNeighborhood.toSeq).fold { emptyLabel } {
56 | case (majorityLabel, _) => majorityLabel
57 | }
58 | }
59 | }
60 |
61 | }
62 |
63 | object KnnClassifier {
64 |
65 | type Type[Input, L, Num, Vec[_]] = KnnClassifier {
66 | type Item = Input
67 | type Label = L
68 | type N = Num
69 | type V[_] = Vec[_]
70 | }
71 |
72 | def apply[Input, L, Num, Vec[_]](
73 | mathVecOps: MathVectorOps.Type[Num, Vec],
74 | representsNoLabel: L
75 | )(
76 | implicit ctForI: ClassTag[Input],
77 | ctForN: ClassTag[Num],
78 | ctForVn: ClassTag[Vec[Num]],
79 | lh: Hashable[L]
80 | ): Type[Input, L, Num, Vec] =
81 | new KnnClassifier {
82 | override type Item = Input
83 | override type N = Num
84 | override type V[_] = Vec[_]
85 | override type Label = L
86 |
87 | override lazy val emptyLabel = representsNoLabel
88 | override lazy val vops =
89 | mathVecOps.asInstanceOf[MathVectorOps.Type[N, V]]
90 |
91 | override implicit lazy val labelHash = lh
92 |
93 | override implicit lazy val ctI = ctForI
94 | override implicit lazy val ctN = ctForN
95 | override implicit lazy val ctVn = ctForVn.asInstanceOf[ClassTag[V[N]]]
96 | }
97 |
98 | }
99 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/NearestNeighbors.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 | import mlbigbook.math.MathVectorOps
5 |
6 | import scala.language.{higherKinds, postfixOps, reflectiveCalls}
7 | import scala.reflect.ClassTag
8 |
9 | trait NearestNeighbors extends RankingModule {
10 |
11 | import Data.ops._
12 |
13 | override type Conf = Distance
14 |
15 | override def mkRanker[D[_]: Data](
16 | dist: Distance,
17 | toVec: Vectorizer
18 | )(
19 | data: D[Item]
20 | ): Ranker = {
21 | val bothItemVec = data.map { item =>
22 | (item, toVec.vectorize(item))
23 | }
24 | limit => itemToRank =>
25 | {
26 | val vecItemToRank = toVec.vectorize(itemToRank)
27 | bothItemVec.sortBy { case (item, vec) => dist(vec, vecItemToRank) }
28 | .take(limit)
29 | .map { case (item, _) => item }
30 | .toSeq
31 | }
32 | }
33 |
34 | }
35 |
36 | object NearestNeighbors {
37 |
38 | type Type[ItemToRank, Num, Vec[_]] = NearestNeighbors {
39 | type Item = ItemToRank
40 | type N = Num
41 | type V[_] = Vec[_]
42 | }
43 |
44 | def apply[ItemToRank, Num, Vec[_]](
45 | mathVecOps: MathVectorOps.Type[Num, Vec]
46 | )(
47 | implicit ctForI: ClassTag[ItemToRank],
48 | ctForN: ClassTag[Num],
49 | ctForVn: ClassTag[Vec[Num]]
50 | ): Type[ItemToRank, Num, Vec] =
51 | new NearestNeighbors {
52 | override type Item = ItemToRank
53 | override type N = Num
54 | override type V[_] = Vec[_]
55 |
56 | override lazy val vops =
57 | mathVecOps.asInstanceOf[MathVectorOps.Type[N, V]]
58 |
59 | override implicit lazy val ctI = ctForI
60 | override implicit lazy val ctN = ctForN
61 | override implicit lazy val ctVn = ctForVn.asInstanceOf[ClassTag[V[N]]]
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/OLD_KnnClassifier.scala:
--------------------------------------------------------------------------------
1 | //package mlbigbook.ml
2 | //
3 | //import mlbigbook.data._
4 | //
5 | //import scala.reflect.ClassTag
6 | //
7 | //object KnnClassifier {
8 | //
9 | // /**
10 | // * Creates a k-Nearest Neighbors classifier.
11 | // *
12 | // * Uses NnRanker.apply underneath to perform the nearest neighbors search.
13 | // */
14 | // def apply[T: ClassTag](n: NearNeighIn)(vdata: VectorDataIn[LabeledData[T]]): Learning[T, Labeled]#Classifier =
15 | // apply(NnRanker(n)(vdata))
16 | //
17 | // def apply[T: ClassTag](nearestNeighborsRanker: Ranker[LabeledData[T]]): Learning[T, Labeled]#Classifier =
18 | // (input: T) => {
19 | // val neighborhood =
20 | // nearestNeighborsRanker(UnlabeledData(input))
21 | // .map(_._1.label)
22 | //
23 | // Labeled(takeLargest(countVotes(neighborhood)))
24 | // }
25 | //
26 | // /**
27 | // * Counts the number of times each element occurs in neighborhood.
28 | // * Returns this information as a mapping.
29 | // */
30 | // def countVotes(neighborhood: Traversable[String]): Map[String, Int] =
31 | // neighborhood.foldLeft(Map.empty[String, Int])(
32 | // (m, label) =>
33 | // if (m.contains(label)) {
34 | // val newCount = m(label) + 1
35 | // (m - label) + (label -> newCount)
36 | // } else {
37 | // m + (label -> 1)
38 | // }
39 | // )
40 | //
41 | // /**
42 | // * Evaluates to the String associated with the largest value (of Numeric type N). If the input
43 | // * elements is empty, evaluates to the empty string ("").
44 | // */
45 | // @inline def takeLargest[N](elements: Map[String, N])(implicit n: Fractional[N]): String =
46 | // takeLargest(elements.toIndexedSeq)
47 | //
48 | // /**
49 | // * Evaluates to the String associated with the largest value (of Numeric type N). If the input
50 | // * elements is empty, evaluates to the empty string ("").
51 | // *
52 | // */
53 | // def takeLargest[N](elements: IndexedSeq[(String, N)])(implicit n: Fractional[N]): String =
54 | // elements.size match {
55 | //
56 | // case 0 =>
57 | // ""
58 | //
59 | // case 1 =>
60 | // elements.head._1
61 | //
62 | // case _ =>
63 | // elements.slice(1, elements.size)
64 | // .foldLeft(elements.head)({
65 | // case ((maxLabel, maxValue), (label, value)) =>
66 | // if (n.gt(value, maxValue))
67 | // (label, value)
68 | // else
69 | // (maxLabel, maxValue)
70 | // })._1
71 | //
72 | // }
73 | //
74 | //}
75 |
76 | // RANKING
77 |
78 | ///**
79 | // * Evaluates to a Traversable containing the elements that have the largest associated values in the input. The
80 | // * returned Traversable has at most limit items.
81 | // */
82 | //def takeTopK[T, N](limit: Int, elements: DataClass[(T, N)])(
83 | // implicit
84 | // n: Fractional[N], c: ClassTag[N]
85 | //): Traversable[(T, N)] =
86 | // elements
87 | // .sortBy(_._2)(c, n.reverse)
88 | // .take(limit)
89 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/ml/RankingModule.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import fif.Data
4 |
5 | import scala.language.{higherKinds, postfixOps, reflectiveCalls}
6 |
7 | trait RankingModule extends ItemNumVecModule {
8 |
9 | type Vectorizer = {
10 | val vectorize: Item => V[N]
11 | val nDimensions: Int
12 | }
13 |
14 | type Distance = (V[N], V[N]) => N
15 |
16 | type Ranker = Int => Item => Seq[Item]
17 |
18 | type Conf
19 |
20 | final def mkRanker[D[_]: Data](
21 | c: Conf,
22 | mkVectorizer: D[Item] => Vectorizer
23 | )(data: D[Item]): Ranker =
24 | mkRanker(c, mkVectorizer(data))(data)
25 |
26 | def mkRanker[D[_]: Data](
27 | c: Conf,
28 | toVec: Vectorizer
29 | )(
30 | data: D[Item]
31 | ): Ranker
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/fp4ml-main/src/main/scala/mlbigbook/util/package.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook
2 |
3 | import scala.reflect.ClassTag
4 |
5 | package object util {
6 |
7 | @inline
8 | def copyToSeq[@specialized A: ClassTag](src: Array[A]): Seq[A] =
9 | if (src == null || src.isEmpty)
10 | Seq.empty[A]
11 | else {
12 | val s = new Array[A](src.length)
13 | System.arraycopy(src, 0, s, 0, src.length)
14 | s.toSeq
15 | }
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=WARN, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.target=System.err
5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
7 |
8 | # Settings to quiet third party logs that are too verbose
9 | log4j.logger.org.eclipse.jetty=WARN
10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=WARN
12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=WARN
13 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/AbstractMathVectorOpsT.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import org.scalatest.{FunSpec, Matchers}
4 |
5 | import scala.language.higherKinds
6 |
7 | abstract class AbstractMathVectorOpsT[N, V[_]] extends FunSpec with Matchers {
8 |
9 | val vops: MathVectorOps.Type[N, V]
10 | implicit lazy val nIsNumeric: Numeric[N] = vops.n
11 |
12 | def vals2vec(vs: N*): V[N]
13 |
14 | def dbl2num(d: Double): N
15 | def int2num(i: Int): N
16 |
17 | implicit class DblAsN(d: Double) {
18 | val n: N = dbl2num(d)
19 | }
20 |
21 | implicit class IntAsN(i: Int) {
22 | val n: N = int2num(i)
23 | }
24 |
25 | val tolerance: N
26 |
27 | describe("vector operations") {
28 |
29 | it("zeros") {
30 | vops.foreach { vops.zeros(5) } { x =>
31 | x should be(0.n +- tolerance)
32 | }
33 | }
34 |
35 | it("ones") {
36 | vops.foreach(vops.ones(5)) { x =>
37 | x should be(1.n +- tolerance)
38 | }
39 | }
40 |
41 | it("add vector") {
42 | val v1 = vals2vec(1.n, 2.n, 40.n)
43 | val v2 = vals2vec((-1).n, 2.n, 100.n)
44 | val r = vops.addV(v1, v2)
45 |
46 | vops(r)(0) should be(0.n +- tolerance)
47 | vops(r)(1) should be(4.n +- tolerance)
48 | vops(r)(2) should be(140.n +- tolerance)
49 | vops.size(r) should be(3)
50 | }
51 |
52 | it("add scalar") {
53 | val v = vals2vec(1.n, 2.n, 40.n)
54 | val s = 10.n
55 | val r = vops.addS(v, s)
56 |
57 | vops(r)(0) should be(11.n +- tolerance)
58 | vops(r)(1) should be(12.n +- tolerance)
59 | vops(r)(2) should be(50.n +- tolerance)
60 | vops.size(r) should be(3)
61 | }
62 |
63 | it("subtract vector") {
64 | val v1 = vals2vec(1.n, 2.n, 40.n)
65 | val v2 = vals2vec((-1).n, 2.n, 100.n)
66 | val r = vops.subV(v1, v2)
67 |
68 | vops(r)(0) should be(2.n +- tolerance)
69 | vops(r)(1) should be(0.n +- tolerance)
70 | vops(r)(2) should be((-60).n +- tolerance)
71 | vops.size(r) should be(3)
72 | }
73 |
74 | it("subtract scalar") {
75 | val v = vals2vec(1.n, 2.n, 40.n)
76 | val s = 10.n
77 | val r = vops.subS(v, s)
78 |
79 | vops(r)(0) should be((-9).n +- tolerance)
80 | vops(r)(1) should be((-8).n +- tolerance)
81 | vops(r)(2) should be(30.n +- tolerance)
82 | vops.size(r) should be(3)
83 | }
84 |
85 | it("dot product") {
86 | val v1 = vals2vec(1.n, 2.n, 40.n)
87 | val v2 = vals2vec((-1).n, 2.n, 100.n)
88 | val r = vops.dot(v1, v2)
89 |
90 | r should be(4003.n +- tolerance)
91 | }
92 |
93 | it("multiply vector") {
94 | val v1 = vals2vec(1.n, 2.n, 40.n)
95 | val v2 = vals2vec((-1).n, 2.n, 100.n)
96 | val r = vops.mulV(v1, v2)
97 |
98 | vops(r)(0) should be((-1).n +- tolerance)
99 | vops(r)(1) should be(4.n +- tolerance)
100 | vops(r)(2) should be(4000.n +- tolerance)
101 | vops.size(r) should be(3)
102 | }
103 |
104 | it("multiply scalar") {
105 | val v = vals2vec(1.n, 2.n, 40.n)
106 | val s = 10.n
107 | val r = vops.mulS(v, s)
108 |
109 | vops(r)(0) should be(10.n +- tolerance)
110 | vops(r)(1) should be(20.n +- tolerance)
111 | vops(r)(2) should be(400.n +- tolerance)
112 | vops.size(r) should be(3)
113 | }
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/AbstractMvoFractionalT.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import scala.language.higherKinds
4 |
5 | trait AbstractMvoFractionalT[N, V[_]] extends AbstractMathVectorOpsT[N, V] {
6 |
7 | it("divide vector") {
8 | val v1 = vals2vec(1.n, 2.n, 40.n)
9 | val v2 = vals2vec((-1).n, 2.n, 100.n)
10 | val r = vops.divV(v1, v2)
11 |
12 | vops(r)(0) should be((-1).n +- tolerance)
13 | vops(r)(1) should be(1.n +- tolerance)
14 | vops(r)(2) should be((2.0 / 5.0).n +- tolerance)
15 | vops.size(r) should be(3)
16 | }
17 |
18 | it("divide scalar") {
19 | val v = vals2vec(1.n, 2.n, 40.n)
20 | val s = 10.n
21 | val r = vops.divS(v, s)
22 |
23 | vops(r)(0) should be(0.1.n +- tolerance)
24 | vops(r)(1) should be(0.2.n +- tolerance)
25 | vops(r)(2) should be(4.n +- tolerance)
26 | vops.size(r) should be(3)
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsDenseDoubleTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.DenseVector
4 |
5 | class MathVectorOpsDenseDoubleTest
6 | extends AbstractMvoFractionalT[Double, DenseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Double, DenseVector]]
10 | override def int2num(i: Int) = i.toDouble
11 | override def dbl2num(d: Double) = d
12 | override def vals2vec(vs: Double*) = DenseVector(vs: _*)
13 | override val tolerance = 1e-6
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsDenseFloatTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.DenseVector
4 |
5 | class MathVectorOpsDenseFloatTest
6 | extends AbstractMvoFractionalT[Float, DenseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Float, DenseVector]]
10 | override def int2num(i: Int) = i.toFloat
11 | override def dbl2num(d: Double) = d.toFloat
12 | override def vals2vec(vs: Float*) = DenseVector(vs: _*)
13 | override val tolerance = 1e-6f
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsDenseIntTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.DenseVector
4 |
5 | class MathVectorOpsDenseIntTest
6 | extends AbstractMathVectorOpsT[Int, DenseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Int, DenseVector]]
10 | override def int2num(i: Int) = i
11 | override def dbl2num(d: Double) = d.toInt
12 | override def vals2vec(vs: Int*) = DenseVector(vs: _*)
13 | override val tolerance = 1
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsDenseLongTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.DenseVector
4 |
5 | class MathVectorOpsDenseLongTest
6 | extends AbstractMathVectorOpsT[Long, DenseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Long, DenseVector]]
10 | override def int2num(i: Int) = i.toLong
11 | override def dbl2num(d: Double) = d.toLong
12 | override def vals2vec(vs: Long*) = DenseVector(vs: _*)
13 | override val tolerance = 1l
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsSparseDoubleTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.SparseVector
4 |
5 | class MathVectorOpsSparseDoubleTest
6 | extends AbstractMvoFractionalT[Double, SparseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Double, SparseVector]]
10 | override def int2num(i: Int) = i.toDouble
11 | override def dbl2num(d: Double) = d.toDouble
12 | override def vals2vec(vs: Double*) = SparseVector(vs: _*)
13 | override val tolerance = 1e-6
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsSparseFloatTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.SparseVector
4 |
5 | class MathVectorOpsSparseFloatTest
6 | extends AbstractMvoFractionalT[Float, SparseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Float, SparseVector]]
10 | override def int2num(i: Int) = i.toFloat
11 | override def dbl2num(d: Double) = d.toFloat
12 | override def vals2vec(vs: Float*) = SparseVector(vs: _*)
13 | override val tolerance = 1e-6f
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsSparseIntTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.SparseVector
4 |
5 | class MathVectorOpsSparseIntTest
6 | extends AbstractMathVectorOpsT[Int, SparseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Int, SparseVector]]
10 | override def int2num(i: Int) = i
11 | override def dbl2num(d: Double) = d.toInt
12 | override def vals2vec(vs: Int*) = SparseVector(vs: _*)
13 | override val tolerance = 1
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/math/MathVectorOpsSparseLongTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.math
2 |
3 | import breeze.linalg.SparseVector
4 |
5 | class MathVectorOpsSparseLongTest
6 | extends AbstractMathVectorOpsT[Long, SparseVector] {
7 |
8 | import MathVectorOps.Implicits._
9 | override val vops = implicitly[MathVectorOps.Type[Long, SparseVector]]
10 | override def int2num(i: Int) = i.toLong
11 | override def dbl2num(d: Double) = d.toLong
12 | override def vals2vec(vs: Long*) = SparseVector(vs: _*)
13 | override val tolerance = 1l
14 | }
15 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/AddressData.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | object AddressData {
4 |
5 | case class Location[@specialized(Int, Double) N: Fractional](x: N, y: N)
6 |
7 | case class Address[@specialized(Int, Double) N: Fractional](
8 | loc: Location[N],
9 | name: Option[String] = None,
10 | number: Option[Int] = None,
11 | street: Option[String] = None,
12 | zip: Option[Short] = None,
13 | city: Option[String] = None,
14 | state: Option[String] = None
15 | )
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/KmeansTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import breeze.linalg.DenseVector
4 | import mlbigbook.math.{MathVectorOps, NumericConversion, RandoMut}
5 | import org.scalatest.FunSuite
6 |
7 | import scala.language.reflectiveCalls
8 |
9 | class KmeansTest extends FunSuite {
10 |
11 | import KmeansTest._
12 | import fif.ImplicitCollectionsData._
13 |
14 | test("Simple run") {
15 |
16 | val initial =
17 | kmeans.initialize(conf.nClusters, stringVectorizer.nDimensions)
18 | println(
19 | s"""INITIAL with nClusters= ${conf.nClusters} & nDimensions= ${stringVectorizer.nDimensions}
20 | |# of clusters FROM INITIAL: ${initial.size}
21 | |
22 | |${initial.mkString("\n")}
23 | |
24 | """.stripMargin
25 | )
26 |
27 | val centers = kmeans.cluster(conf, distance, stringVectorizer)(data)
28 |
29 | centers foreach println
30 | }
31 |
32 | }
33 |
34 | object KmeansTest {
35 |
36 | val conf = ClusteringConf(
37 | nClusters = 2,
38 | tolerance = 0.001,
39 | maxIterations = 25
40 | )
41 |
42 | val kmeans: Kmeans.Type[String, Float, DenseVector] = {
43 | import NumericConversion.Implicits._
44 | Kmeans[String, Float, DenseVector](
45 | MathVectorOps.Implicits.FloatDenseVot,
46 | RandoMut.newSeedPerCall[Float]
47 | )
48 | }
49 |
50 | val data = Seq(
51 | "hello world",
52 | "hello hello",
53 | "how world",
54 | "hello how world world hello"
55 | )
56 |
57 | val words = "hello world how are you doing today fine great".split(" ").toSeq
58 |
59 | val word2index = words.zipWithIndex.toMap
60 |
61 | val initial = word2index.map { case (_, index) => (index, 0.0f) }
62 |
63 | val stringVectorizer: kmeans.Vectorizer = new {
64 |
65 | lazy val vectorize = (s: String) =>
66 | DenseVector {
67 | val bothIndexValue = s.split(" ").foldLeft(initial) {
68 | case (accum, word) =>
69 | val index = word2index(word)
70 | (accum - index) + (index -> (accum(index) + 1.0f))
71 | }
72 |
73 | (0 until nDimensions).map { index =>
74 | bothIndexValue.getOrElse(index, 0.0f)
75 | }.toArray
76 | }
77 |
78 | lazy val nDimensions = words.size
79 | }
80 |
81 | val distance: kmeans.Distance = (v1, v2) => {
82 | val r = kmeans.vops.subV(v1, v2)
83 | kmeans.vops.dot(r, r)
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/KnnClassifierTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import breeze.linalg.DenseVector
4 | import mlbigbook.math.MathVectorOps
5 | import org.scalatest.FunSuite
6 |
7 | import scala.language.reflectiveCalls
8 |
9 | class KnnClassifierTest extends FunSuite {
10 |
11 | import KnnClassifierTest._
12 | import fif.ImplicitCollectionsData._
13 |
14 | test("Sanity Check: 1-NN Classification on train set is perfect") {
15 |
16 | val classify = knn.train((1, distance), stringVectorizer)(data)
17 |
18 | data foreach {
19 | case (item, label) =>
20 | val predicted = classify(item)
21 | assert(predicted === label)
22 | }
23 | }
24 |
25 | }
26 |
27 | object KnnClassifierTest {
28 |
29 | import ImplicitHashable._
30 |
31 | val knn = KnnClassifier[String, Boolean, Float, DenseVector](
32 | MathVectorOps.Implicits.FloatDenseVot,
33 | representsNoLabel = false
34 | )
35 |
36 | val data = Seq(
37 | ("becky wow", true),
38 | ("oh my lord", true),
39 | ("where is that", true),
40 | ("how now", false),
41 | ("how now brown cow", false),
42 | ("how how how how do you do it", false),
43 | ("how", false)
44 | )
45 |
46 | val words = data.flatMap { case (ws, _) => ws.split(" ") }.toSet
47 |
48 | val word2index = words.zipWithIndex.toMap
49 |
50 | val initial = word2index.map { case (_, index) => (index, 0.0f) }
51 |
52 | val stringVectorizer: knn.Vectorizer = new {
53 |
54 | lazy val vectorize = (s: String) =>
55 | DenseVector {
56 | val bothIndexValue = s.split(" ").foldLeft(initial) {
57 | case (accum, word) =>
58 | val index = word2index(word)
59 | (accum - index) + (index -> (accum(index) + 1.0f))
60 | }
61 |
62 | (0 until nDimensions).map { index =>
63 | bothIndexValue.getOrElse(index, 0.0f)
64 | }.toArray
65 | }
66 |
67 | lazy val nDimensions = words.size
68 | }
69 |
70 | val distance: knn.Distance = (v1, v2) => {
71 | val r = knn.vops.subV(v1, v2)
72 | knn.vops.dot(r, r)
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/KnnLshClassifierTest.scala:
--------------------------------------------------------------------------------
1 | //package mlbigbook.ml
2 | //
3 | //import mlbigbook.data.Labeled
4 | //import mlbigbook.wordcount.LocalSparkContext
5 | //import org.scalatest.FunSuite
6 | //
7 | //class KnnLshClassifierTest extends FunSuite with LocalSparkContext {
8 | //
9 | // import KnnLshClassifierTest._
10 | //
11 | // ignore("classify simple addresses") {
12 | // fail("unimplemented")
13 | // }
14 | //
15 | //}
16 | //
17 | //object KnnLshClassifierTest {
18 | //
19 | // import NearestNeighborsLSHTest._
20 | //
21 | // def classificationTest[T](c: Learning[T, Labeled]#Classifier, input: T, expected: Labeled): Err = {
22 | // val actual = c(input)
23 | // if (actual.label != expected.label)
24 | // Some(s"Expected and actual labels dont match. Expecting: ${actual.label} . Actual: ${actual.label}")
25 | // else
26 | // None
27 | // }
28 | //
29 | //}
30 | //
31 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/NearestNeighborsLSHTest.scala:
--------------------------------------------------------------------------------
1 | //package mlbigbook.ml
2 | //
3 | //import mlbigbook.data.mut.DenseVector
4 | //import mlbigbook.data.{ OLD_VectorizerMaker, OLD_Vectorizer, OLD_VectorizerMaker$, OLD_Vectorizer$ }
5 | //import mlbigbook.wordcount.LocalSparkContext
6 | //import org.scalatest.FunSuite
7 | //
8 | //class NearestNeighborsLSHTest extends FunSuite with LocalSparkContext {
9 | //
10 | // import NearestNeighborsLSHTest._
11 | //
12 | // ignore("nearest neighbors addresses, k=3") {
13 | // fail("unimmplemented")
14 | // }
15 | //
16 | // ignore("LSH modified NN addresses, k=3, nBins=5") {
17 | // fail("unimplemented")
18 | // }
19 | //
20 | //}
21 | //
22 | //object NearestNeighborsLSHTest {
23 | //
24 | // type Err = Option[String]
25 | //
26 | // def nnTest[T](nn: Ranker[T], input: T, expected: Traversable[(T, Double)]): Err = {
27 | //
28 | // val actual = nn(input)
29 | //
30 | // val errors =
31 | // expected.toSeq.zip(actual.toIndexedSeq).foldLeft(List.empty[String])({
32 | // case (sum, (e, a)) =>
33 | // if (e != a)
34 | // sum :+ s"""Expecting: $e | Actual: $a"""
35 | // else
36 | // sum
37 | // })
38 | //
39 | // if (errors.nonEmpty)
40 | // Some(s"""Found ${errors.length} differences: ${errors.mkString("\n")}""")
41 | // else
42 | // None
43 | // }
44 | //
45 | // lazy val nnConfig = NearNeighIn(Manhattan, 3)
46 | //
47 | // lazy val nLshFuncs = 5
48 | //
49 | // def lshConfig: LshIn = ???
50 | // //LshIn(???, nLshFuncs)
51 | //
52 | // import AddressData._
53 | //
54 | // val apartments = Seq(
55 | // Address(Location(1, 2), Some("apartment A")),
56 | // Address(Location(0, 0), Some("apartment B")),
57 | // Address(Location(3, 2), Some("apartment C")),
58 | // Address(Location(5, 2), Some("apartment D")),
59 | // Address(Location(4, 5), Some("apartment E")),
60 | // Address(Location(0, 5), Some("apartment F")),
61 | // Address(Location(4, 4), Some("apartment G")),
62 | // Address(Location(3, 2), Some("apartment H")),
63 | // Address(Location(2, 1), Some("apartment I")),
64 | // Address(Location(5, 3), Some("apartment J"))
65 | // )
66 | //
67 | // import OLD_Vectorizer._
68 | //
69 | // def addressVectorizer[N: Fractional]: OLD_Vectorizer[Address[N]] =
70 | // (a: Address[N]) =>
71 | // DenseVector(Array(implicitly[Numeric[N]].toDouble(a.loc.x), implicitly[Numeric[N]].toDouble(a.loc.y)))
72 | //
73 | // def mkAddressVectorizer[N](implicit n: Fractional[N]): OLD_VectorizerMaker[Address[N]] =
74 | // ???
75 | //
76 | //}
77 | //
78 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/NearestNeighborsTest.scala:
--------------------------------------------------------------------------------
1 | package mlbigbook.ml
2 |
3 | import breeze.linalg.DenseVector
4 | import mlbigbook.math.MathVectorOps
5 | import org.scalatest.FunSuite
6 |
7 | import scala.language.reflectiveCalls
8 |
9 | class NearestNeighborsTest extends FunSuite {
10 |
11 | import NearestNeighborsTest._
12 | import fif.ImplicitCollectionsData._
13 |
14 | test("Sanity check: 1-NN on train set evaluates to input item") {
15 | val rank = nn.mkRanker(distance, stringVectorizer)(data)
16 | data foreach { item =>
17 | val retrieved = rank(1)(item)
18 | assert(retrieved.size === 1)
19 | assert(retrieved.head === item)
20 | }
21 | }
22 |
23 | }
24 |
25 | object NearestNeighborsTest {
26 | val nn = NearestNeighbors[String, Float, DenseVector](
27 | MathVectorOps.Implicits.FloatDenseVot
28 | )
29 | val data = KnnClassifierTest.data.map { case (ws, _) => ws }
30 | val stringVectorizer: nn.Vectorizer = KnnClassifierTest.stringVectorizer
31 | val distance: nn.Distance = KnnClassifierTest.distance
32 | }
33 |
--------------------------------------------------------------------------------
/fp4ml-main/src/test/scala/mlbigbook/ml/OLDKMeansTest.scala:
--------------------------------------------------------------------------------
1 | //package mlbigbook.ml
2 | //
3 | //import mlbigbook.wordcount.LocalSparkContext
4 | //import org.scalatest.FunSuite
5 | //
6 | //class OLDKMeansTest extends FunSuite with LocalSparkContext {
7 | //
8 | // ignore("classify simple addresses") {
9 | // fail("unimplemented")
10 | // }
11 | //
12 | //}
13 | //
14 | //object OLDKMeansTest {
15 | //
16 | // import NearestNeighborsLSHTest._
17 | //
18 | // def softClusterTest[T](sc: SoftCluster[T], input: T, expected: IndexedSeq[(OLD_Center, Double)]): Err = {
19 | // val actual = sc(input)
20 | //
21 | // val errors =
22 | // expected.zip(actual).foldLeft(List.empty[String])({
23 | // case (sum, (e, a)) =>
24 | // if (e != a)
25 | // sum :+ s"""Expecting: $e | Actual: $a"""
26 | // else
27 | // sum
28 | // })
29 | //
30 | // if (errors.nonEmpty)
31 | // Some(s"""Found ${errors.length} differences: ${errors.mkString("\n")}""")
32 | // else
33 | // None
34 | // }
35 | //
36 | //}
37 | //
38 |
--------------------------------------------------------------------------------
/fp4ml-spark/README.md:
--------------------------------------------------------------------------------
1 | # fp4ml-spark
2 | [Maven Central](https://maven-badges.herokuapp.com/maven-central/io.malcolmgreaves/fp4ml-spark_2.11/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/io.malcolmgreaves/fp4ml-spark_2.11)
3 | Machine learning for functional programmers.
4 | Extensions to `fp4ml-main` to uses the Spark ecosystem.
--------------------------------------------------------------------------------
/fp4ml-spark/build.sbt:
--------------------------------------------------------------------------------
1 | name := "fp4ml-spark"
2 |
3 | import SharedBuild._
4 |
5 | addCompilerPlugin(scalaMacros)
6 |
7 | libraryDependencies ++=
8 | fp4mlSparkDeps ++
9 | testDeps
10 |
11 | fork in run := false
12 |
13 | pomExtra := pomExtraInfo
14 |
--------------------------------------------------------------------------------
/fp4ml-spark/src/main/scala/TODO:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/malcolmgreaves/fp4ml/69958a463ce40e508c2a6103599e1af35c9f2845/fp4ml-spark/src/main/scala/TODO
--------------------------------------------------------------------------------
/project/SharedBuild.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import Keys._
3 |
4 | object SharedBuild {
5 |
6 | // // // // // // // //
7 | // // Versions // //
8 | // // // // // // // //
9 |
10 | lazy val breezeV = "0.12"
11 | lazy val nakV = "1.3"
12 | lazy val dataTcV = "0.0.0"
13 | lazy val scalaMacrosV = "2.1.0"
14 | lazy val avroCgV = "0.3.4"
15 | lazy val shapelessV = "2.2.5"
16 | lazy val wispV = "0.0.4"
17 | lazy val argonautV = "6.1"
18 | lazy val scalajV = "2.2.1"
19 |
20 | // // // // // // // // // //
21 | // // Dependencies // //
22 | // // // // // // // // // //
23 |
24 | lazy val scalaMacros =
25 | "org.scalamacros" % "paradise" % scalaMacrosV cross CrossVersion.full
26 |
27 | lazy val fp4mlMainDeps = Seq(
28 | "org.scalanlp" %% "breeze" % breezeV,
29 | "org.scalanlp" %% "breeze-natives" % breezeV,
30 | "org.scalanlp" %% "nak" % nakV,
31 | "com.quantifind" %% "wisp" % wispV,
32 | // [B] necessary?
33 | "io.argonaut" %% "argonaut" % argonautV,
34 | "org.scalaj" %% "scalaj-http" % scalajV,
35 | // [E] necessary?
36 | "com.chuusai" %% "shapeless" % shapelessV,
37 | "com.gonitro" %% "avro-codegen-runtime" % avroCgV,
38 | "io.malcolmgreaves" %% "data-tc-extra" % dataTcV,
39 | "io.malcolmgreaves" %% "data-tc-scala" % dataTcV
40 | )
41 |
42 | lazy val fp4mlSparkDeps = Seq(
43 | "io.malcolmgreaves" %% "data-tc-spark" % dataTcV
44 | )
45 |
46 | lazy val testDeps = Seq(
47 | "org.scalatest" %% "scalatest" % "2.2.6" % Test
48 | )
49 |
50 | lazy val pomExtraInfo = {
51 | https://github.com/malcolmgreaves/fp4ml
52 |
53 |
54 | Apache 2.0
55 | https://www.apache.org/licenses/LICENSE-2.0.txt
56 | repo
57 |
58 |
59 |
60 | git@github.com:malcolmgreaves/fp4ml.git
61 | scm:git@github.com:malcolmgreaves/fp4ml.git
62 |
63 |
64 |
65 | malcolmgreaves
66 | Malcolm Greaves
67 | greaves.malcolm@gmail.com
68 | https://malcolmgreaves.io/
69 |
70 |
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.8
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | logLevel := Level.Warn
2 |
3 | addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "0.4.10")
4 |
5 | addSbtPlugin("com.gonitro" % "avro-codegen-compiler" % "0.3.4")
6 |
7 | //addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.0.4")
8 | //addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.0.0")
9 |
--------------------------------------------------------------------------------