├── project ├── build.properties └── plugins.sbt ├── images ├── MCL.png ├── Expansion.png ├── Inflation.png └── Difference.png ├── src ├── test │ ├── resources │ │ ├── MCL │ │ │ ├── clusters.tab │ │ │ ├── karateNodes.csv │ │ │ └── karateEdges.csv │ │ └── MCLUtils │ │ │ ├── OrientedNodes.txt │ │ │ ├── OrientedEdges.txt │ │ │ └── OrientedMatrixSelfLoop.txt │ └── scala │ │ ├── org │ │ └── apache │ │ │ └── spark │ │ │ └── mllib │ │ │ └── clustering │ │ │ ├── MCLFunSuite.scala │ │ │ ├── MCLModelSuite.scala │ │ │ ├── MCLUtilsSuite.scala │ │ │ └── MCLSuite.scala │ │ └── MainSuite.scala └── main │ └── scala │ ├── org │ └── apache │ │ └── spark │ │ └── mllib │ │ └── clustering │ │ ├── MCLModel.scala │ │ ├── MCLUtils.scala │ │ └── MCL.scala │ └── Main.scala ├── .gitignore ├── LICENSE.txt ├── .travis.yml └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.11 -------------------------------------------------------------------------------- /images/MCL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joandre/MCL_spark/HEAD/images/MCL.png -------------------------------------------------------------------------------- /images/Expansion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joandre/MCL_spark/HEAD/images/Expansion.png -------------------------------------------------------------------------------- /images/Inflation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joandre/MCL_spark/HEAD/images/Inflation.png -------------------------------------------------------------------------------- /images/Difference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joandre/MCL_spark/HEAD/images/Difference.png -------------------------------------------------------------------------------- /src/test/resources/MCL/clusters.tab: -------------------------------------------------------------------------------- 1 | 1 2 3 4 5 6 7 8 11 12 13 14 18 20 22 17 2 | 9 31 10 28 29 33 34 15 16 19 21 23 24 30 27 3 | 32 26 25 4 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.4.0") 2 | 3 | resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/" 4 | 5 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.4") -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | # sbt specific 4 | .cache 5 | .lib/ 6 | dist/* 7 | target/ 8 | lib_managed/ 9 | src_managed/ 10 | project/boot/ 11 | project/plugins/project/ 12 | # Scala-IDE specific 13 | .idea/ 14 | .scala_dependencies 15 | .worksheet -------------------------------------------------------------------------------- /src/test/resources/MCL/karateNodes.csv: -------------------------------------------------------------------------------- 1 | 1 "1" 2 | 2 "2" 3 | 3 "3" 4 | 4 "4" 5 | 5 "5" 6 | 6 "6" 7 | 7 "7" 8 | 8 "8" 9 | 9 "9" 10 | 10 "10" 11 | 11 "11" 12 | 12 "12" 13 | 13 "13" 14 | 14 "14" 15 | 15 "15" 16 | 16 "16" 17 | 17 "17" 18 | 18 "18" 19 | 19 "19" 20 | 20 "20" 21 | 21 "21" 22 | 22 "22" 23 | 23 "23" 24 | 24 "24" 25 | 25 "25" 26 | 26 "26" 27 | 27 "27" 28 | 28 "28" 29 | 29 "29" 30 | 30 "30" 31 | 31 "31" 32 | 32 "32" 33 | 33 "33" 34 | 34 "34" 35 | -------------------------------------------------------------------------------- /src/test/resources/MCLUtils/OrientedNodes.txt: -------------------------------------------------------------------------------- 1 | 1 "0" 2 | 2 "1" 3 | 3 "2" 4 | 4 "3" 5 | 5 "4" 6 | 6 "5" 7 | 7 "6" 8 | 8 "7" 9 | 9 "8" 10 | 10 "9" 11 | 11 "10" 12 | 12 "11" 13 | 13 "12" 14 | 14 "13" 15 | 15 "14" 16 | 16 "15" 17 | 17 "16" 18 | 18 "17" 19 | 19 "18" 20 | 20 "19" 21 | 21 "20" 22 | 22 "21" 23 | 23 "22" 24 | 24 "23" 25 | 25 "24" 26 | 26 "25" 27 | 27 "26" 28 | 28 "27" 29 | 29 "28" 30 | 30 "29" 31 | 31 "30" 32 | 32 "31" 33 | 33 "32" 34 | 34 "33" 35 | 35 "34" 36 | 36 "35" 37 | 37 "36" 38 | 38 "37" 39 | 39 "38" 40 | 40 "39" 41 | 41 "40" 42 | 42 "41" 43 | 43 "42" 44 | 44 "43" 45 | 45 "44" 46 | 46 "45" 47 | 47 "46" 48 | 48 "47" 49 | 49 "48" 50 | 50 "49" 51 | -------------------------------------------------------------------------------- /src/test/resources/MCL/karateEdges.csv: -------------------------------------------------------------------------------- 1 | 1 2 4 2 | 1 3 5 3 | 1 4 3 4 | 1 5 3 5 | 1 6 3 6 | 1 7 3 7 | 1 8 2 8 | 1 9 2 9 | 1 11 2 10 | 1 12 3 11 | 1 13 1 12 | 1 14 3 13 | 1 18 2 14 | 1 20 2 15 | 1 22 2 16 | 1 32 2 17 | 2 3 6 18 | 2 4 3 19 | 2 8 4 20 | 2 14 5 21 | 2 18 1 22 | 2 20 2 23 | 2 22 2 24 | 2 31 2 25 | 3 4 3 26 | 3 8 4 27 | 3 9 5 28 | 3 10 1 29 | 3 14 3 30 | 3 28 2 31 | 3 29 2 32 | 3 33 2 33 | 4 8 3 34 | 4 13 3 35 | 4 14 3 36 | 5 7 2 37 | 5 11 3 38 | 6 7 5 39 | 6 11 3 40 | 6 17 3 41 | 7 17 3 42 | 9 31 3 43 | 9 33 3 44 | 9 34 4 45 | 10 34 2 46 | 14 34 3 47 | 15 33 3 48 | 15 34 2 49 | 16 33 3 50 | 16 34 4 51 | 19 33 1 52 | 19 34 2 53 | 20 34 1 54 | 21 33 3 55 | 21 34 1 56 | 23 33 2 57 | 23 34 3 58 | 24 26 5 59 | 24 28 4 60 | 24 30 3 61 | 24 33 5 62 | 24 34 4 63 | 25 26 2 64 | 25 28 3 65 | 25 32 2 66 | 26 32 7 67 | 27 30 4 68 | 27 34 2 69 | 28 34 4 70 | 29 32 2 71 | 29 34 2 72 | 30 33 4 73 | 30 34 2 74 | 31 33 3 75 | 31 34 3 76 | 32 33 4 77 | 32 34 4 78 | 33 34 5 79 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2017, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2015-2016, Joan André 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | language: scala 24 | script: 25 | - sbt clean coverage test coverageReport 26 | after_success: 27 | - bash <(curl -s https://codecov.io/bash) 28 | scala: 29 | - 2.10.5 30 | - 2.11.8 31 | jdk: 32 | - oraclejdk8 33 | env: 34 | - SPARKVERSION="2.0.1" 35 | 36 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/mllib/clustering/MCLFunSuite.scala: -------------------------------------------------------------------------------- 1 | /*The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE.*/ 22 | 23 | package org.apache.spark.mllib.clustering 24 | 25 | import com.holdenkarau.spark.testing.{DatasetSuiteBase, SharedSparkContext} 26 | import org.scalatest.{FunSuite, Matchers, Tag} 27 | 28 | /** MCL specific implementation of Scala Test Suite */ 29 | //TODO Why spark ? 30 | private[spark] abstract class MCLFunSuite extends FunSuite with Matchers with SharedSparkContext with DatasetSuiteBase 31 | 32 | object UnitTest extends Tag("org.apache.spark.mllib.clustering.tags.UnitTest") 33 | object IntegrationTest extends Tag("org.apache.spark.mllib.clustering.tags.IntegrationTest") -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/mllib/clustering/MCLModelSuite.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.mllib.clustering 2 | 3 | import org.apache.log4j.{Level, Logger} 4 | import org.apache.spark.graphx._ 5 | import org.apache.spark.rdd.RDD 6 | import org.apache.spark.sql.Row 7 | import org.apache.spark.util.Utils 8 | 9 | /** 10 | * Created by andrejoan on 4/30/16. 11 | */ 12 | class MCLModelSuite extends MCLFunSuite{ 13 | // Disable Spark messages when running program 14 | Logger.getLogger("org").setLevel(Level.OFF) 15 | Logger.getLogger("akka").setLevel(Level.OFF) 16 | 17 | test("model save/load", UnitTest){ 18 | 19 | val users: RDD[(VertexId, String)] = 20 | sc.parallelize(Array((0L,"Node1"), (1L,"Node2"), 21 | (2L,"Node3"), (3L,"Node4"),(4L,"Node5"), 22 | (5L,"Node6"), (6L,"Node7"), (7L, "Node8"), 23 | (8L, "Node9"), (9L, "Node10"), (10L, "Node11"))) 24 | 25 | val relationships: RDD[Edge[Double]] = 26 | sc.parallelize( 27 | Seq(Edge(0, 1, 1.0), Edge(1, 0, 1.0), 28 | Edge(0, 2, 1.0), Edge(2, 0, 1.0), 29 | Edge(0, 3, 1.0), Edge(3, 0, 1.0), 30 | Edge(1, 2, 1.0), Edge(2, 1, 1.0), 31 | Edge(1, 3, 1.0), Edge(3, 1, 1.0), 32 | Edge(2, 3, 1.0), Edge(3, 2, 1.0), 33 | Edge(4, 5, 1.0), Edge(5, 4, 1.0), 34 | Edge(4, 6, 1.0), Edge(6, 4, 1.0), 35 | Edge(4, 7, 1.0), Edge(7, 4, 1.0), 36 | Edge(5, 6, 1.0), Edge(6, 5, 1.0), 37 | Edge(5, 7, 1.0), Edge(7, 5, 1.0), 38 | Edge(6, 7, 1.0), Edge(7, 6, 1.0), 39 | Edge(3, 8, 1.0), Edge(8, 3, 1.0), 40 | Edge(9, 8, 1.0), Edge(8, 9, 1.0), 41 | Edge(9, 10, 1.0), Edge(10, 9, 1.0), 42 | Edge(4, 10, 1.0), Edge(10, 4, 1.0) 43 | )) 44 | 45 | val graph = Graph(users, relationships) 46 | 47 | val model: MCLModel = MCL.train(graph) 48 | 49 | // Check number of clusters 50 | model.nbClusters shouldEqual 3 51 | 52 | // Check save and load methods 53 | val tempDir = Utils.createTempDir() 54 | val path = tempDir.toURI.toString 55 | 56 | Array(true, false).foreach { case selector => 57 | // Save model, load it back, and compare. 58 | try { 59 | model.save(sc, path) 60 | val sameModel = MCLModel.load(sc, path) 61 | assertDatasetEquals(model.assignments.orderBy("id"), sameModel.assignments.orderBy("id")) 62 | } finally { 63 | Utils.deleteRecursively(tempDir) 64 | } 65 | } 66 | 67 | } 68 | 69 | test("nodes assignments", UnitTest) { 70 | val nodeId = 1.0.toLong 71 | val cluster = 2.0.toLong 72 | val newAssignment:Assignment = Assignment.apply(Row(nodeId, cluster)) 73 | 74 | newAssignment.id shouldEqual nodeId 75 | newAssignment.cluster shouldEqual cluster 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/test/scala/MainSuite.scala: -------------------------------------------------------------------------------- 1 | /*The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE.*/ 22 | 23 | import org.scalatest.{FunSuite, Matchers} 24 | 25 | class MainSuite extends FunSuite with Matchers{ 26 | 27 | test("toInt"){ 28 | val eR = Main.toInt(Symbol("expansionRate"), "2") 29 | eR shouldEqual 2 30 | 31 | an [Exception] should be thrownBy Main.toInt(Symbol("expansionRate"), "1.1") 32 | } 33 | 34 | test("toDouble"){ 35 | val iR = Main.toDouble(Symbol("inflationRate"), "2.0") 36 | iR shouldEqual 2.0 37 | 38 | an [Exception] should be thrownBy Main.toDouble(Symbol("inflationRate"), "test") 39 | } 40 | 41 | test("nextOption"){ 42 | val args: Array[String] = Array("--expansionRate", "3", "--inflationRate", "3.0", "--epsilon", "0.1", "--maxIterations", "20", "--selfLoopWeight", "0.1", "--graphOrientationStrategy", "directed") 43 | val arglist = args.toList 44 | 45 | val options = Main.nextOption(Map(),arglist) 46 | Main.toInt('expansionRate, options.getOrElse('expansionRate, 2).toString) shouldEqual 3 47 | Main.toDouble('inflationRate, options.getOrElse('inflationRate, 2.0).toString) shouldEqual 3.0 48 | Main.toDouble('epsilon, options.getOrElse('epsilon, 0.01).toString) shouldEqual 0.1 49 | Main.toInt('maxIterations, options.getOrElse('maxIterations, 10).toString) shouldEqual 20 50 | Main.toDouble('selfLoopWeight, options.getOrElse('selfLoopWeight, 1.0).toString) shouldEqual 0.1 51 | options.getOrElse('graphOrientationStrategy, "undirected").toString shouldEqual "directed" 52 | 53 | val args2: Array[String] = Array("--wrongOption", "test") 54 | val arglist2 = args2.toList 55 | 56 | an [Exception] should be thrownBy Main.nextOption(Map(),arglist2) 57 | } 58 | 59 | /*test("main"){ 60 | val args: Array[String] = Array("--expansionRate", "2", "--inflationRate", "2.0", "--epsilon", "0.01", "--maxIterations", "10", "--selfLoopWeight", "1", "--graphOrientationStrategy", "undirected") 61 | 62 | val streamIM = new java.io.ByteArrayOutputStream() 63 | Console.withOut(streamIM) { 64 | Main.main(args) 65 | } 66 | 67 | streamIM.toString.split("\n") should contain theSameElementsAs Array("0 => List(0, 1, 2, 3)", "4 => List(4, 5, 6, 7)", "9 => List(8, 9, 10)").toSeq 68 | }*/ 69 | } 70 | -------------------------------------------------------------------------------- /src/test/resources/MCLUtils/OrientedEdges.txt: -------------------------------------------------------------------------------- 1 | 1 1 1.0 2 | 1 9 1.0 3 | 1 18 1.0 4 | 1 20 1.0 5 | 1 21 1.0 6 | 1 24 1.0 7 | 1 35 1.0 8 | 1 43 1.0 9 | 1 45 1.0 10 | 1 46 1.0 11 | 2 7 1.0 12 | 2 11 1.0 13 | 2 12 1.0 14 | 2 13 1.0 15 | 2 18 1.0 16 | 2 24 1.0 17 | 2 28 1.0 18 | 2 31 1.0 19 | 2 40 1.0 20 | 3 9 1.0 21 | 3 13 1.0 22 | 3 15 1.0 23 | 3 21 1.0 24 | 3 23 1.0 25 | 3 24 1.0 26 | 3 29 1.0 27 | 3 30 1.0 28 | 3 38 1.0 29 | 3 42 1.0 30 | 3 50 1.0 31 | 4 35 1.0 32 | 4 40 1.0 33 | 4 45 1.0 34 | 4 46 1.0 35 | 4 48 1.0 36 | 4 49 1.0 37 | 5 14 1.0 38 | 5 15 1.0 39 | 5 19 1.0 40 | 5 22 1.0 41 | 5 26 1.0 42 | 5 28 1.0 43 | 5 34 1.0 44 | 5 36 1.0 45 | 5 40 1.0 46 | 6 20 1.0 47 | 6 21 1.0 48 | 6 23 1.0 49 | 6 26 1.0 50 | 6 28 1.0 51 | 6 36 1.0 52 | 6 38 1.0 53 | 6 44 1.0 54 | 7 8 1.0 55 | 7 12 1.0 56 | 7 15 1.0 57 | 7 17 1.0 58 | 7 20 1.0 59 | 7 22 1.0 60 | 7 25 1.0 61 | 7 26 1.0 62 | 7 28 1.0 63 | 7 36 1.0 64 | 7 42 1.0 65 | 8 25 1.0 66 | 8 26 1.0 67 | 8 28 1.0 68 | 8 35 1.0 69 | 8 37 1.0 70 | 8 38 1.0 71 | 8 39 1.0 72 | 9 10 1.0 73 | 9 11 1.0 74 | 9 17 1.0 75 | 9 18 1.0 76 | 9 34 1.0 77 | 9 46 1.0 78 | 9 47 1.0 79 | 10 13 1.0 80 | 10 21 1.0 81 | 10 22 1.0 82 | 10 24 1.0 83 | 10 28 1.0 84 | 10 34 1.0 85 | 10 48 1.0 86 | 10 49 1.0 87 | 10 50 1.0 88 | 11 18 1.0 89 | 11 22 1.0 90 | 11 24 1.0 91 | 11 26 1.0 92 | 11 27 1.0 93 | 11 34 1.0 94 | 11 36 1.0 95 | 11 37 1.0 96 | 11 44 1.0 97 | 12 28 1.0 98 | 12 33 1.0 99 | 12 36 1.0 100 | 12 38 1.0 101 | 12 50 1.0 102 | 13 17 1.0 103 | 13 19 1.0 104 | 13 23 1.0 105 | 13 29 1.0 106 | 13 31 1.0 107 | 13 39 1.0 108 | 13 41 1.0 109 | 13 46 1.0 110 | 13 50 1.0 111 | 14 21 1.0 112 | 14 24 1.0 113 | 14 26 1.0 114 | 14 35 1.0 115 | 14 45 1.0 116 | 15 20 1.0 117 | 15 22 1.0 118 | 15 25 1.0 119 | 15 29 1.0 120 | 15 43 1.0 121 | 15 46 1.0 122 | 15 47 1.0 123 | 16 19 1.0 124 | 16 26 1.0 125 | 16 33 1.0 126 | 16 38 1.0 127 | 16 40 1.0 128 | 16 44 1.0 129 | 16 45 1.0 130 | 16 50 1.0 131 | 17 18 1.0 132 | 17 20 1.0 133 | 17 24 1.0 134 | 17 26 1.0 135 | 17 27 1.0 136 | 17 28 1.0 137 | 17 30 1.0 138 | 17 44 1.0 139 | 17 45 1.0 140 | 18 20 1.0 141 | 18 31 1.0 142 | 18 36 1.0 143 | 18 37 1.0 144 | 18 40 1.0 145 | 19 30 1.0 146 | 19 35 1.0 147 | 19 36 1.0 148 | 19 37 1.0 149 | 19 39 1.0 150 | 19 43 1.0 151 | 19 46 1.0 152 | 20 24 1.0 153 | 20 27 1.0 154 | 20 34 1.0 155 | 20 45 1.0 156 | 20 47 1.0 157 | 21 22 1.0 158 | 21 31 1.0 159 | 21 32 1.0 160 | 22 23 1.0 161 | 22 25 1.0 162 | 22 27 1.0 163 | 22 28 1.0 164 | 22 30 1.0 165 | 22 32 1.0 166 | 22 36 1.0 167 | 22 47 1.0 168 | 23 32 1.0 169 | 23 36 1.0 170 | 23 37 1.0 171 | 23 38 1.0 172 | 23 39 1.0 173 | 23 43 1.0 174 | 23 44 1.0 175 | 23 45 1.0 176 | 24 28 1.0 177 | 24 40 1.0 178 | 24 47 1.0 179 | 25 34 1.0 180 | 26 27 1.0 181 | 26 33 1.0 182 | 26 34 1.0 183 | 26 35 1.0 184 | 26 42 1.0 185 | 26 43 1.0 186 | 27 30 1.0 187 | 27 32 1.0 188 | 27 33 1.0 189 | 27 35 1.0 190 | 27 38 1.0 191 | 27 41 1.0 192 | 27 44 1.0 193 | 28 32 1.0 194 | 28 42 1.0 195 | 28 43 1.0 196 | 28 46 1.0 197 | 29 31 1.0 198 | 29 32 1.0 199 | 29 37 1.0 200 | 29 40 1.0 201 | 30 31 1.0 202 | 31 36 1.0 203 | 31 48 1.0 204 | 31 49 1.0 205 | 32 37 1.0 206 | 32 40 1.0 207 | 32 46 1.0 208 | 32 48 1.0 209 | 33 35 1.0 210 | 33 44 1.0 211 | 33 49 1.0 212 | 33 50 1.0 213 | 34 38 1.0 214 | 34 42 1.0 215 | 34 44 1.0 216 | 34 47 1.0 217 | 35 36 1.0 218 | 35 42 1.0 219 | 35 47 1.0 220 | 35 49 1.0 221 | 36 37 1.0 222 | 36 38 1.0 223 | 36 41 1.0 224 | 36 43 1.0 225 | 36 47 1.0 226 | 37 41 1.0 227 | 37 48 1.0 228 | 38 45 1.0 229 | 38 48 1.0 230 | 39 50 1.0 231 | 40 41 1.0 232 | 40 42 1.0 233 | 40 50 1.0 234 | 41 42 1.0 235 | 41 45 1.0 236 | 41 47 1.0 237 | 42 47 1.0 238 | 43 44 1.0 239 | 43 46 1.0 240 | 43 49 1.0 241 | 44 47 1.0 242 | 50 50 1.0 243 | 45 49 1.0 244 | 46 49 1.0 245 | 47 48 1.0 246 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/mllib/clustering/MCLModel.scala: -------------------------------------------------------------------------------- 1 | /*The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE.*/ 22 | 23 | package org.apache.spark.mllib.clustering 24 | 25 | import org.apache.spark.SparkContext 26 | import org.apache.spark.mllib.util.{Loader, Saveable} 27 | import org.apache.spark.sql.functions._ 28 | import org.apache.spark.sql.{Dataset, Row, SparkSession} 29 | import org.json4s.JsonDSL._ 30 | import org.json4s._ 31 | import org.json4s.jackson.JsonMethods._ 32 | 33 | /** A clustering model for MCL. 34 | * 35 | * @param assignments an RDD of clustering assignments 36 | * @todo complete save and load features 37 | */ 38 | 39 | class MCLModel(var assignments: Dataset[Assignment]) extends Saveable with Serializable{ 40 | 41 | /** Get number of clusters.*/ 42 | def nbClusters: Int = assignments 43 | .groupBy("cluster") 44 | .agg(collect_list(col("id"))) 45 | .collect.length 46 | 47 | /** 48 | * Save MCL clusters assignments 49 | * 50 | * @param sc current Spark Context 51 | * @param path location where MCL model is saved 52 | */ 53 | 54 | override def save(sc: SparkContext, path: String): Unit = { 55 | MCLModel.SaveLoadV1_0.save(sc, this, path) 56 | } 57 | 58 | override protected def formatVersion: String = "1.0" 59 | } 60 | 61 | object MCLModel extends Loader[MCLModel]{ 62 | 63 | /** Load MCL clusters assignments 64 | * 65 | * @param sc current Spark Context 66 | * @param path location where MCL model is saved 67 | */ 68 | 69 | override def load(sc: SparkContext, path: String): MCLModel = { 70 | MCLModel.SaveLoadV1_0.load(sc, path) 71 | } 72 | 73 | private[clustering] 74 | object SaveLoadV1_0 { 75 | 76 | private val thisFormatVersion = "1.0" 77 | 78 | private[clustering] 79 | val thisClassName = "org.apache.spark.mllib.clustering.MCLModel" 80 | 81 | def save(sc: SparkContext, model: MCLModel, path: String): Unit = { 82 | val metadata = compact(render( 83 | ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) 84 | )) 85 | sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path)) 86 | 87 | model.assignments.write.parquet(Loader.dataPath(path)) 88 | } 89 | 90 | def load(sc: SparkContext, path: String): MCLModel = { 91 | implicit val formats = DefaultFormats 92 | val spark = SparkSession.builder().getOrCreate() 93 | import spark.implicits._ 94 | 95 | val (className, formatVersion, metadata) = Loader.loadMetadata(sc, path) 96 | assert(className == thisClassName) 97 | assert(formatVersion == thisFormatVersion) 98 | 99 | /*val expansionRate = (metadata \ "expansionRate").extract[Double] 100 | val inflationRate = (metadata \ "inflationRate").extract[Double] 101 | val epsilon = (metadata \ "epsilon").extract[Double] 102 | val maxIterations = (metadata \ "maxIterations").extract[Int]*/ 103 | 104 | val assignments = spark.read.parquet(Loader.dataPath(path)) 105 | // Check if loading file respects Assignment class schema 106 | Loader.checkSchema[Assignment](assignments.schema) 107 | val certifiedAssignments = assignments.map { 108 | case Row(id: Long, cluster: Long) => Assignment(id, cluster) 109 | } 110 | 111 | new MCLModel(certifiedAssignments) 112 | } 113 | } 114 | } 115 | 116 | /** List which point belongs to which cluster 117 | * 118 | * @param id node id 119 | * @param cluster cluster id 120 | */ 121 | 122 | case class Assignment(id: Long, cluster: Long) 123 | 124 | /** Factory for [[MCLModel.assignments]] instances. */ 125 | private object Assignment { 126 | 127 | /** Creates an assignment with a given node id and a given cluster id 128 | * 129 | * @param r a row with two columns: one for node id and one for cluster id 130 | */ 131 | def apply(r: Row): Assignment = { 132 | Assignment(r.getLong(0), r.getLong(1)) 133 | } 134 | } -------------------------------------------------------------------------------- /src/test/resources/MCLUtils/OrientedMatrixSelfLoop.txt: -------------------------------------------------------------------------------- 1 | 1.0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;1.0;1.0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;1.0;0;1.0;1.0;0;0;0;0 2 | 0;1.0;0;0;0;0;1.0;0;0;0;1.0;1.0;1.0;0;0;0;0;1.0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0 3 | 0;0;1.0;0;0;0;0;0;1.0;0;0;0;1.0;0;1.0;0;0;0;0;0;1.0;0;1.0;1.0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;1.0 4 | 0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;0;0;0;1.0;1.0;0;1.0;1.0;0 5 | 0;0;0;0;1.0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;1.0;0;0;1.0;0;0;0;1.0;0;1.0;0;0;0;0;0;1.0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0 6 | 0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;1.0;0;1.0;0;0;0;0;0;1.0;0;0;0;0;0;0 7 | 0;0;0;0;0;0;1.0;1.0;0;0;0;1.0;0;0;1.0;0;1.0;0;0;1.0;0;1.0;0;0;1.0;1.0;0;1.0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0 8 | 0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;0;0;0;0;0;1.0;0;1.0;1.0;1.0;0;0;0;0;0;0;0;0;0;0;0 9 | 0;0;0;0;0;0;0;0;1.0;1.0;1.0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0 10 | 0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;0;0;1.0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;1.0 11 | 0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;1.0;0;1.0;1.0;0;0;0;0;0;0;1.0;0;1.0;1.0;0;0;0;0;0;0;1.0;0;0;0;0;0;0 12 | 0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;1.0 13 | 0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;1.0;0;0;0;1.0;0;0;0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;1.0;0;1.0;0;0;0;0;1.0;0;0;0;1.0 14 | 0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0 15 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;1.0;0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;1.0;0;0;0 16 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;1.0;0;0;0;1.0;1.0;0;0;0;0;1.0 17 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;0;0;1.0;0;1.0;1.0;1.0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0 18 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;1.0;0;0;1.0;0;0;0;0;0;0;0;0;0;0 19 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;1.0;1.0;0;1.0;0;0;0;1.0;0;0;1.0;0;0;0;0 20 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0;0;1.0;0;0;0 21 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0 22 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;1.0;1.0;0;1.0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0 23 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;1.0;1.0;1.0;0;0;0;1.0;1.0;1.0;0;0;0;0;0 24 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;0 25 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0 26 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;1.0;1.0;1.0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0 27 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;0;1.0;1.0;0;1.0;0;0;1.0;0;0;1.0;0;0;1.0;0;0;0;0;0;0 28 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;1.0;0;0;0;0 29 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;1.0;1.0;0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;0;0;0;0;0 30 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0 31 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0 32 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;0;1.0;0;0;0;0;0;1.0;0;1.0;0;0 33 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;1.0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;1.0 34 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;0;1.0;0;1.0;0;0;1.0;0;0;0 35 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;1.0;0 36 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;1.0;0;0;1.0;0;1.0;0;0;0;1.0;0;0;0 37 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0 38 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;1.0;0;0;1.0;0;0 39 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;0;0;0;0;0;0;1.0 40 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;1.0;0;0;0;0;0;0;0;1.0 41 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0;1.0;0;1.0;0;0;0 42 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;0;1.0;0;0;0 43 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;1.0;0;0;1.0;0 44 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;0;0;0 45 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;0;1.0;0 46 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0;1.0;0 47 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;1.0;0;0 48 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0;0 49 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0;0 50 | 0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1.0 51 | -------------------------------------------------------------------------------- /src/main/scala/Main.scala: -------------------------------------------------------------------------------- 1 | /*The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE.*/ 22 | 23 | // Import required spark classes 24 | 25 | import org.apache.log4j.{Level, Logger} 26 | import org.apache.spark.graphx._ 27 | import org.apache.spark.mllib.clustering.{Assignment, MCL} 28 | import org.apache.spark.rdd.RDD 29 | import org.apache.spark.{SparkConf, SparkContext} 30 | import org.apache.spark.sql.Dataset 31 | import org.apache.spark.sql.functions._ 32 | 33 | /** Define main method for a start-up example*/ 34 | object Main { 35 | 36 | // Disable Spark messages when running program 37 | Logger.getLogger("org").setLevel(Level.OFF) 38 | Logger.getLogger("akka").setLevel(Level.OFF) 39 | 40 | // Guide for users who want to run MCL program 41 | val usage = """ 42 | Usage: mcl [--expansionRate num] [--inflationRate num] [--epsilon num] [--maxIterations num] [--selfLoopWeight num] [--graphOrientationStrategy string] 43 | """ 44 | 45 | type OptionMap = Map[Symbol, Any] 46 | 47 | def toInt(key: Symbol, s: String): Int = { 48 | try { 49 | s.toInt 50 | } catch { 51 | case e: Exception => throw new Exception("\n" + key.toString() + " must be an integer") 52 | } 53 | } 54 | 55 | def toDouble(key: Symbol, s: String): Double = { 56 | try { 57 | s.toDouble 58 | } catch { 59 | case e: Exception => throw new Exception("\n" + key.toString() + " must be a double") 60 | } 61 | } 62 | 63 | def nextOption(map : OptionMap, list: List[String]) : OptionMap = { 64 | list match { 65 | case Nil => map 66 | case "--expansionRate" :: value :: tail => 67 | nextOption(map ++ Map('expansionRate -> value), tail) 68 | case "--inflationRate" :: value :: tail => 69 | nextOption(map ++ Map('inflationRate -> value), tail) 70 | case "--epsilon" :: value :: tail => 71 | nextOption(map ++ Map('epsilon -> value), tail) 72 | case "--maxIterations" :: value :: tail => 73 | nextOption(map ++ Map('maxIterations -> value), tail) 74 | case "--selfLoopWeight" :: value :: tail => 75 | nextOption(map ++ Map('selfLoopWeight -> value), tail) 76 | case "--graphOrientationStrategy" :: value :: tail => 77 | nextOption(map ++ Map('graphOrientationStrategy -> value), tail) 78 | case option :: tail => throw new Exception("\nUnknown option " + option) 79 | } 80 | } 81 | 82 | def main(args: Array[String]) { 83 | 84 | // Manage options for the program 85 | if (args.length == 0) println(usage) 86 | val arglist = args.toList 87 | 88 | try{ 89 | val options = nextOption(Map(),arglist) 90 | val expansionRate:Int = toInt('expansionRate, options.getOrElse('expansionRate, 2).toString) 91 | val inflationRate:Double = toDouble('inflationRate, options.getOrElse('inflationRate, 2.0).toString) 92 | val epsilon:Double = toDouble('epsilon, options.getOrElse('epsilon, 0.01).toString) 93 | val maxIterations:Int = toInt('maxIterations, options.getOrElse('maxIterations, 10).toString) 94 | val selfLoopWeight:Double = toDouble('selfLoopWeight, options.getOrElse('selfLoopWeight, 1.0).toString) 95 | val graphOrientationStrategy:String = options.getOrElse('graphOrientationStrategy, "undirected").toString 96 | 97 | // Initialise spark context 98 | val conf = new SparkConf() 99 | .setMaster("local[*]") 100 | .set("spark.driver.memory", "1g") 101 | .set("spark.executor.memory", "1g") 102 | .setAppName("MCL") 103 | 104 | val sc = new SparkContext(conf) 105 | 106 | // Create and RDD for vertices 107 | val users: RDD[(VertexId, String)] = 108 | sc.parallelize(Array((0L,"Node1"), (1L,"Node2"), 109 | (2L,"Node3"), (3L,"Node4"),(4L,"Node5"), 110 | (5L,"Node6"), (6L,"Node7"), (7L, "Node8"), 111 | (8L, "Node9"), (9L, "Node10"), (10L, "Node11"))) 112 | 113 | // Create an RDD for edges 114 | val relationships: RDD[Edge[Double]] = 115 | sc.parallelize( 116 | Seq(Edge(0, 1, 1.0), Edge(1, 0, 1.0), 117 | Edge(0, 2, 1.0), Edge(2, 0, 1.0), 118 | Edge(0, 3, 1.0), Edge(3, 0, 1.0), 119 | Edge(1, 2, 1.0), Edge(2, 1, 1.0), 120 | Edge(1, 3, 1.0), Edge(3, 1, 1.0), 121 | Edge(2, 3, 1.0), Edge(3, 2, 1.0), 122 | Edge(4, 5, 1.0), Edge(5, 4, 1.0), 123 | Edge(4, 6, 1.0), Edge(6, 4, 1.0), 124 | Edge(4, 7, 1.0), Edge(7, 4, 1.0), 125 | Edge(5, 6, 1.0), Edge(6, 5, 1.0), 126 | Edge(5, 7, 1.0), Edge(7, 5, 1.0), 127 | Edge(6, 7, 1.0), Edge(7, 6, 1.0), 128 | Edge(3, 8, 1.0), Edge(8, 3, 1.0), 129 | Edge(9, 8, 1.0), Edge(8, 9, 1.0), 130 | Edge(9, 10, 1.0), Edge(10, 9, 1.0), 131 | Edge(4, 10, 1.0), Edge(10, 4, 1.0) 132 | )) 133 | 134 | // Build the initial Graph 135 | val graph = Graph(users, relationships) 136 | 137 | // Run MCL algorithm and get nodes assignments to generated clusters 138 | val clusters: Dataset[Assignment] = 139 | MCL.train( 140 | graph, 141 | expansionRate, 142 | inflationRate, 143 | epsilon, 144 | maxIterations, 145 | selfLoopWeight, 146 | graphOrientationStrategy) 147 | .assignments 148 | 149 | clusters 150 | .groupBy("cluster") 151 | .agg(sort_array(collect_list(col("id")))) 152 | .show(3) 153 | 154 | // Terminate spark context 155 | sc.stop() 156 | 157 | } 158 | catch{ 159 | case e: Exception => println(e.getMessage) 160 | sys.exit(1) 161 | } 162 | } 163 | } -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/mllib/clustering/MCLUtils.scala: -------------------------------------------------------------------------------- 1 | /*The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016, Joan André 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE.*/ 22 | 23 | package org.apache.spark.mllib.clustering 24 | 25 | import breeze.linalg.max 26 | import org.apache.spark.graphx._ 27 | import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, IndexedRow, IndexedRowMatrix} 28 | import org.apache.spark.mllib.linalg.{SparseVector, Vectors} 29 | import org.apache.spark.rdd.RDD 30 | import org.apache.spark.sql.{DataFrame, Row} 31 | import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} 32 | import org.apache.spark.sql.types._ 33 | 34 | /** 35 | * Utils functions for MCL algorithm implementation. 36 | */ 37 | object MCLUtils { 38 | 39 | /** Print an adjacency matrix in nice format. 40 | * 41 | * @param mat an adjacency matrix 42 | */ 43 | def displayMatrix(mat: IndexedRowMatrix): Unit={ 44 | println() 45 | mat 46 | .rows.sortBy(_.index).collect() 47 | .foreach(row => { 48 | printf(row.index + " => ") 49 | row.vector.toArray 50 | .foreach(v => printf(",%.4f", v)) 51 | println() 52 | }) 53 | } 54 | 55 | def displayBlockMatrix(mat: BlockMatrix): Unit={ 56 | println() 57 | mat 58 | .blocks.sortBy(_._1).collect() 59 | .foreach( 60 | block => { 61 | printf(block._2.toString()) 62 | }) 63 | } 64 | 65 | /** Get a suitable graph for MCL model algorithm. 66 | * 67 | * Each vertex id in the graph corresponds to a row id in the adjacency matrix. 68 | * 69 | * @param graph original graph 70 | * @param lookupTable a matching table with nodes ids and new ordered ids 71 | * @return prepared graph for MCL algorithm 72 | */ 73 | def preprocessGraph[VD](graph: Graph[VD, Double], lookupTable: DataFrame): Graph[Int, Double]={ 74 | val newVertices: RDD[(VertexId, Int)] = 75 | lookupTable.rdd.map( 76 | row => (row.getInt(1).toLong, row.getInt(0)) 77 | ) 78 | 79 | Graph(newVertices, graph.edges) 80 | .groupEdges((e1,e2) => e1 + e2) 81 | } 82 | 83 | /** Deal with self loop 84 | * 85 | * Add one when weight is nil and remain as it is otherwise 86 | * 87 | * @param graph original graph 88 | * @param selfLoopWeight a coefficient between 0 and 1 to influence clustering granularity and objective 89 | * @return an RDD of self loops weights and associated coordinates. 90 | */ 91 | def selfLoopManager(graph: Graph[Int, Double], selfLoopWeight: Double): RDD[(Int, (Int, Double))] = { 92 | 93 | val graphWithLinkedEdges: Graph[Array[Edge[Double]], Double] = 94 | Graph( 95 | graph 96 | .collectEdges(EdgeDirection.Either), 97 | graph.edges 98 | ) 99 | 100 | val selfLoop:RDD[(Int, (Int, Double))] = 101 | graph 102 | .triplets 103 | .filter(e => e.srcId==e.dstId && e.attr > 0) 104 | .map(e => (e.srcId, e.srcAttr)) 105 | .fullOuterJoin(graph.vertices) 106 | .filter(join => join._2._1.isEmpty) 107 | .leftOuterJoin(graphWithLinkedEdges.vertices) 108 | .map(v => 109 | (v._2._1._2.get, 110 | (v._2._1._2.get, 111 | v._2._2.getOrElse(Array(Edge(1.0.toLong, 1.0.toLong, 1.0))).map(e => e.attr).max*selfLoopWeight) 112 | ) 113 | ) 114 | 115 | selfLoop 116 | } 117 | 118 | /** Deal with multiple adjacency matrix filling strategy depending on graph orientation 119 | * 120 | * @param graph original graph 121 | * @param graphOrientationStrategy chose a graph strategy completion depending on its nature. 3 choices: undirected, directed, birected. 122 | * @return an RDD of new edges weights and associated coordinates. 123 | */ 124 | def graphOrientationManager(graph: Graph[Int, Double], graphOrientationStrategy: String): RDD[(Int, (Int, Double))] = { 125 | 126 | graphOrientationStrategy match { 127 | 128 | //Undirected Graph Solution 129 | case "undirected" => 130 | 131 | graph.triplets.map( 132 | triplet => (triplet.srcAttr, (triplet.dstAttr, triplet.attr)) 133 | ) 134 | 135 | //Directed Graph Solution => with only one possible orientation per edge 136 | case "directed" => 137 | 138 | graph.triplets.flatMap( 139 | triplet => { 140 | if (triplet.srcAttr != triplet.dstAttr) { 141 | Array((triplet.srcAttr, (triplet.dstAttr, triplet.attr)), (triplet.dstAttr, (triplet.srcAttr, triplet.attr))) 142 | } 143 | else { 144 | Array((triplet.srcAttr, (triplet.dstAttr, triplet.attr))) 145 | } 146 | } 147 | ) 148 | 149 | //Directed Graph Solution => with only one possible orientation per edge 150 | case "bidirected" => 151 | 152 | val tempEntries: RDD[((Int, Int), (Double, Int))] = graph.triplets.flatMap( 153 | triplet => { 154 | Array( 155 | ((triplet.srcAttr, triplet.dstAttr), (triplet.attr, 1)), 156 | ((triplet.dstAttr, triplet.srcAttr), (triplet.attr, 2)) 157 | ) 158 | } 159 | ) 160 | 161 | tempEntries 162 | .groupByKey() 163 | .map( 164 | e => 165 | if(e._2.size > 1){ 166 | val value = e._2.filter(v => v._2 == 1).head._1 167 | (e._1._1, (e._1._2, value)) 168 | } 169 | else{ 170 | (e._1._1, (e._1._2, e._2.head._1)) 171 | } 172 | ) 173 | } 174 | } 175 | 176 | /** Transform a Graph into an IndexedRowMatrix 177 | * 178 | * @param graph original graph 179 | * @param selfLoopWeight a coefficient between 0 and 1 to influence clustering granularity and objective 180 | * @param graphOrientationStrategy chose a graph strategy completion depending on its nature. 3 choices: undirected, directed, birected. 181 | * @return a ready adjacency matrix for MCL process. 182 | * @todo Check graphOrientationStrategy choice for current graph 183 | */ 184 | def toIndexedRowMatrix(graph: Graph[Int, Double], selfLoopWeight: Double, graphOrientationStrategy: String): IndexedRowMatrix = { 185 | 186 | //Especially relationships values have to be checked before doing what follows 187 | val rawEntries: RDD[(Int, (Int, Double))] = graphOrientationManager(graph, graphOrientationStrategy) 188 | 189 | val numOfNodes:Int = graph.numVertices.toInt 190 | 191 | val selfLoop:RDD[(Int, (Int, Double))] = selfLoopManager(graph, selfLoopWeight) 192 | val entries:RDD[(Int, (Int, Double))] = rawEntries.union(selfLoop) 193 | 194 | val indexedRows = entries.groupByKey().map(e => 195 | IndexedRow(e._1, Vectors.sparse(numOfNodes, e._2.toSeq)) 196 | ) 197 | 198 | new IndexedRowMatrix(indexedRows) 199 | } 200 | 201 | /** Transform an IndexedRowMatrix into a Graph 202 | * 203 | * @param mat an adjacency matrix 204 | * @param vertices vertices of original graph 205 | * @return associated graph 206 | */ 207 | def toGraph(mat: IndexedRowMatrix, vertices: RDD[(VertexId, String)]): Graph[String, Double] = { 208 | val edges: RDD[Edge[Double]] = 209 | mat.rows.flatMap(f = row => { 210 | val svec: SparseVector = row.vector.toSparse 211 | val it:Range = svec.indices.indices 212 | it.map(ind => Edge(row.index, svec.indices.apply(ind), svec.values.apply(ind))) 213 | }) 214 | Graph(vertices, edges) 215 | } 216 | 217 | 218 | 219 | } 220 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [](https://travis-ci.org/joandre/MCL_spark) 2 | [](https://codecov.io/gh/joandre/MCL_spark) 3 | 4 | # MCL Spark 5 | 6 | **License:** [MIT](https://github.com/joandre/MCL_spark/blob/master/LICENSE.txt) 7 | 8 | **MCL Spark** is an experimental project which goal is to implement a graph clustering algorithm in [Spark](https://github.com/apache/spark), using especially distributed matrix tools embedded in the scala API. 9 | 10 | Why MCL algorithm? Because it responds to Spark MLLib [contribution policy](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-MLlib-specificContributionGuidelines) first four points: 11 | * Be widely known 12 | * Be used and accepted 13 | * Be highly scalable 14 | * Be well documented 15 | 16 | Please do not hesitate to post comments or questions. 17 | 18 | Most of the following content is based on Stijn van Dongen website (http://micans.org/mcl/). 19 | 20 | Table of Contents 21 | ================= 22 | 23 | * [MCL Spark](#mcl-spark) 24 | * [Getting Started](#getting-started) 25 | * [Online Documentation](#online-documentation) 26 | * [Requirements](#requirements) 27 | * [Building From Sources](#building-from-sources) 28 | * [Use embarked example](#use-embarked-example) 29 | * [Parameters choices](#parameters-choices) 30 | * [MCL (Markov Cluster) algorithm theory](#mcl-markov-cluster-algorithm-theory) 31 | * [Expansion](#expansion) 32 | * [Inflation](#inflation) 33 | * [Convergence and clusters interpretation](#convergence-and-clusters-interpretation) 34 | * [Optimizations](#optimizations) 35 | * [Implementation thoughts](#implementation-thoughts) 36 | * [Spark matrices universe](#spark-matrices-universe) 37 | * [IndexedRowMatrix](#indexedrowmatrix) 38 | * [BlockMatrix](#blockmatrix) 39 | * [Directed graph management](#directed-graph-management) 40 | * [Hypergraph](#hypergraph) 41 | * [References](#references) 42 | 43 | ## Getting Started 44 | 45 | ### Online Documentation 46 | 47 | A Scaladoc is available [here](http://joandre.github.io/docs/MCL_Spark/api/). 48 | 49 | ### Requirements 50 | 51 | * JDK 1.8 or higher 52 | * SBT 0.13.9 (see http://www.scala-sbt.org/download.html for more information) 53 | * Build against Spark 1.6.1+ 54 | 55 | ### Building From Sources 56 | 57 | This library is built with SBT. To build a JAR file simply run "sbt package" from the project root. Currently project was built under scala 2.10.5. 58 | 59 | ### Use embarked example 60 | 61 | ``` 62 | 63 | $MCL_SPARK_HOME/sbt "run [--expansionRate num] [--inflationRate num] [--epsilon num] [--maxIterations num] [--selfLoopWeight num] [--graphOrientationStrategy string]" 64 | 65 | ``` 66 | 67 | ### Import MCL into your Spark Shell 68 | 69 | ``` 70 | 71 | $SPARK_HOME/bin/spark-shell --jars $MCL_SPARK_HOME/target/scala-2.11/mcl_spark_2.11-1.0.0.jar 72 | 73 | ``` 74 | 75 | Then use MCL as follows: 76 | 77 | ``` 78 | import org.apache.spark.graphx._ 79 | import org.apache.spark.mllib.clustering.{Assignment, MCL} 80 | import org.apache.spark.rdd.RDD 81 | import org.apache.spark.sql.Dataset 82 | import org.apache.spark.sql.functions.{sort_array,collect_list,col} 83 | 84 | val users: RDD[(VertexId, String)] = 85 | sc.parallelize(Array((0L,"Node1"), (1L,"Node2"), 86 | (2L,"Node3"), (3L,"Node4"),(4L,"Node5"), 87 | (5L,"Node6"), (6L,"Node7"), (7L, "Node8"), 88 | (8L, "Node9"), (9L, "Node10"), (10L, "Node11"))) 89 | 90 | // Create an RDD for edges 91 | val relationships: RDD[Edge[Double]] = 92 | sc.parallelize( 93 | Seq(Edge(0, 1, 1.0), Edge(1, 0, 1.0), 94 | Edge(0, 2, 1.0), Edge(2, 0, 1.0), 95 | Edge(0, 3, 1.0), Edge(3, 0, 1.0), 96 | Edge(1, 2, 1.0), Edge(2, 1, 1.0), 97 | Edge(1, 3, 1.0), Edge(3, 1, 1.0), 98 | Edge(2, 3, 1.0), Edge(3, 2, 1.0), 99 | Edge(4, 5, 1.0), Edge(5, 4, 1.0), 100 | Edge(4, 6, 1.0), Edge(6, 4, 1.0), 101 | Edge(4, 7, 1.0), Edge(7, 4, 1.0), 102 | Edge(5, 6, 1.0), Edge(6, 5, 1.0), 103 | Edge(5, 7, 1.0), Edge(7, 5, 1.0), 104 | Edge(6, 7, 1.0), Edge(7, 6, 1.0), 105 | Edge(3, 8, 1.0), Edge(8, 3, 1.0), 106 | Edge(9, 8, 1.0), Edge(8, 9, 1.0), 107 | Edge(9, 10, 1.0), Edge(10, 9, 1.0), 108 | Edge(4, 10, 1.0), Edge(10, 4, 1.0) 109 | )) 110 | 111 | // Build the initial Graph 112 | val graph = Graph(users, relationships) 113 | graph.cache() 114 | 115 | val clusters: Dataset[Assignment] = 116 | MCL.train(graph).assignments 117 | clusters 118 | .groupBy("cluster") 119 | .agg(sort_array(collect_list(col("id")))) 120 | .show(3) 121 | 122 | ``` 123 | 124 | ### Parameters choices 125 | 126 | **Inflation and Expansion rates** => The two parameters influence what we call cluster granularity, so how many and how strong should be detected groups of nodes. Inflation increases intra cluster links and decreases inter cluster links while expansion connects nodes to further and new parts of the graph. **Default = 2** 127 | 128 | 1. A big inflation rate will strengthen existing clusters. 129 | 2. A big expansion rate will boost clusters merging. 130 | 131 | Nota bene: Only integers are accepted for expansion rate for now (for computational reasons). 132 | 133 | **Epsilon** => In order to keep the adjacency matrix associated with our graph sparse, one strategy is to remove some negligible edges regarding its weight. Let's say you chose an epsilon equal to 0.05. This means that every edge, connected to one node, which weight is inferior to 5% of the sum of every edges weight connected to our node is removed (see Optimization paragraph for more details). **Default = 0.01** 134 | 135 | **Maximum number of iterations** => It forces MCL to stop before it converges. Regarding Stijn van Dongen recommendations, a steady state is usually reached after 10 iterations. **Default = 10** 136 | 137 | **Self loops weight management** => A percentage of the maximum weight can be applied to added self loops. For example, for a binary graph, 1 is the maximum weight to allocate (see Optimization paragraph for more details). **Default = 0.1** 138 | 139 | **Directed and undirected graphs management** => To deal with directed graphs. **Default = "undirected"** 140 | 141 | 1. "undirected": graph is supposed undirected. No edges are added. 142 | 2. "directed": graph is supposed directed. Each edge inverse is added so graph becomes undirected. 143 | 3. "bidirected": graph already owns bidirected edges. Excepted for already existing undirected edges, each edge inverse is added so graph becomes undirected. 144 | 145 | See [Implementation thoughts](#implementation-thoughts) for more details. 146 | 147 | ## MCL (Markov Cluster) algorithm theory 148 | 149 | ### Recall about Markov chains 150 | 151 | *"A Markov chain is a sequence of random variables X1, X2, X3, ... with the Markov property, namely that the probability of moving to next state depends only on the present state and not on the previous states."* ([wikipedia definition](https://en.wikipedia.org/wiki/Markov_chain#Formal_definition)) 152 | 153 | **Defintion**: a state is absorbent when it cannot be left. 154 | 155 | **Definition**: a Markov chain is aperiodic, if it at least one of its state has a period of 1, so returning to the original state occurs irregularly. 156 | 157 | **Definition**: a Markov chain is irreducible, if it is possible to get to any state from any state. 158 | 159 | **Definition**: a Markov chain is ergodic, if it is both aperiodic and irreducible. 160 | 161 | ### Principle 162 | 163 | To detect clusters inside a graph, MCL algorithm uses a Column Stochastic Matrix representation and the concept of random walks. The idea is that random walks between two nodes that belong to the same group are more frequent than between two nodes belonging to different groups. So we should compute probability that a node reach each other node of the graph to have a better insight of clusters. 164 | 165 | **Definition**: a Column Stochastic Matrix (CSM) is a non-negative matrix which each column sum is equal to 1. In our case, we will prefer Row Stochastic Matrix (RSM) instead of CSM to use Spark API tools (see Implementation thoughts for more details). 166 | 167 | Two steps are needed to simulate random walks on a graph: expansion and inflation. Each step is associated with a specific rate (respectively eR and iR). In the following formula, n is the number of nodes in the graph. 168 | 169 | ### Expansion 170 | To perform **expansion**, we raise the stochastic matrix to the power eR using the normal matrix product. 171 | 172 |