├── README.md ├── inner-sparse ├── build.sbt └── InnerSparseMatrixMultiply.scala └── outer-sparse ├── build.sbt └── OuterMatrixMultiply.scala /README.md: -------------------------------------------------------------------------------- 1 | # spark-spmm-compute 2 | 3 | Suit for COO format sparse matrix multiplication on Apache Spark. 4 | -------------------------------------------------------------------------------- /inner-sparse/build.sbt: -------------------------------------------------------------------------------- 1 | version := "1.0" 2 | scalaVersion := "2.11.8" 3 | 4 | libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.2.0" 5 | -------------------------------------------------------------------------------- /outer-sparse/build.sbt: -------------------------------------------------------------------------------- 1 | version := "1.0" 2 | scalaVersion := "2.11.8" 3 | 4 | libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.2.0" 5 | -------------------------------------------------------------------------------- /outer-sparse/OuterMatrixMultiply.scala: -------------------------------------------------------------------------------- 1 | import org.apache.spark.SparkContext 2 | import org.apache.spark.SparkConf 3 | 4 | object OuterSparseMatrixMultiply { 5 | def main(args: Array[String]) { 6 | 7 | val p = args(0).toInt // minPartitions 8 | val input1 = args(1).toString // left matrix 9 | val input2 = args(2).toString // right matrix 10 | val m = args(3).toInt // left matrix row size 11 | val k = args(4).toInt // left matrix column size 12 | val n = args(5).toInt // right matrix column size 13 | val nSplits = args(6).toInt // numSplits 14 | 15 | val conf = new SparkConf().setAppName("outer_"+m+"-"+k+"-"+n) 16 | val sc = new SparkContext(conf) 17 | 18 | val rdd1 = sc.textFile(input1, p) 19 | val rdd2 = sc.textFile(input2, p) 20 | 21 | val mKv = rdd1.map(r => r.split(" ")).map(r => (r(1).toInt, (r(0).toInt, r(2).toDouble))) 22 | val nKv = rdd2.map(r => r.split(" ")).map(r => (r(0).toInt, (r(1).toInt, r(2).toDouble))) 23 | 24 | val mnJo = mKv.join(nKv, nSplits) 25 | 26 | val mult = mnJo.map(x=> (((x._2)._1._1, (x._2)._2._1), (x._2)._1._2 * (x._2)._2._2)) 27 | 28 | val result = mult.reduceByKey((x,y) => x+y).map( a => a._1._1 + " " + a._1._2 + " " + a._2) 29 | 30 | result.saveAsTextFile("/outer_sparse_result") 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /inner-sparse/InnerSparseMatrixMultiply.scala: -------------------------------------------------------------------------------- 1 | import org.apache.spark.SparkContext 2 | import org.apache.spark.SparkConf 3 | 4 | import breeze.linalg.SparseVector 5 | 6 | object InnerSparseMatrixMultiply { 7 | 8 | def main(args: Array[String]) { 9 | 10 | val p = args(0).toInt // minPartitions 11 | val input1 = args(1).toString // left matrix 12 | val input2 = args(2).toString // right matrix 13 | val m = args(3).toInt // left matrix row size 14 | val k = args(4).toInt // left matrix column size 15 | val n = args(5).toInt // right matrix column size 16 | 17 | val conf = new SparkConf().setAppName("inner_"+m+"-"+k+"-"+n) 18 | val sc = new SparkContext(conf) 19 | 20 | val rdd1 = sc.textFile(input1, p) 21 | val rdd2 = sc.textFile(input2, p) 22 | 23 | val me1 = rdd1.map( r => r.split(" ")).map( r => (a(0).toInt, (r(1).toInt, r(2).toDouble))) 24 | val me2 = rdd2.map( r => r.split(" ")).map( r => (r(1).toInt, (r(0).toInt, r(2).toDouble))) 25 | 26 | val lRowGrouped = me1.groupByKey() 27 | val lSparse = lRowGrouped.map(x => (x._1, x._2.toSeq.sortBy(_._1).unzip)) 28 | val lBreezeSparse = lSparse.map(x => (x._1, new SparseVector(x._2._1.toArray, x._2._2.toArray, k))) 29 | 30 | val rightMat = me2.groupByKey() 31 | val rSparse = rightMat.map(x => (x._1, x._2.toSeq.sortBy(_._1).unzip)) 32 | val rBreezeSparse = rSparse.map(x => (x._1, new SparseVector(x._2._1.toArray, x._2._2.toArray, k))) 33 | 34 | val bRight = sc.broadcast(rBreezeSparse.collect) 35 | 36 | val result = lBreezeSparse.flatMap{ case(lIndex, lVector) => {bRight.value.map(x => ((lIndex, x._1), lVector.dot(x._2)))}}.filter(x => x._2 != 0.0).map( r => r._1._1 + " " + r._1._2 + " " + r._2) 37 | 38 | result.saveAsTextFile("/inner_sparse_result") 39 | 40 | } 41 | } 42 | --------------------------------------------------------------------------------