├── run ├── dataset_example └── config_example ├── .gitignore ├── src ├── cracker │ ├── CrackerMessageSize.scala │ ├── CrackerMainJava.java │ ├── CrackerMessagePropagation.scala │ ├── CrackerMessageRedPhase.scala │ ├── CrackerMessageIdentification.scala │ ├── CrackerMessageTree.scala │ ├── CrackerStats.scala │ ├── CrackerMain.scala │ └── CrackerAlgorithm.scala ├── ccf │ ├── CcfMessage.scala │ ├── CcfMainJava.java │ └── CcfMain.scala ├── ccmr │ ├── CcmrMainJava.java │ ├── CcmrMessage.scala │ └── CcmrMain.scala ├── sgc │ ├── SGCMainJava.java │ ├── SGCMessage.scala │ └── SGCMain.scala ├── hashMin │ ├── HashMinMainJava.java │ ├── HashMinMessage.scala │ └── HashMinMain.scala ├── hashToMin │ ├── HashToMinMainJava.java │ ├── HashToMinMessage.scala │ └── HashToMinMain.scala ├── alternating │ ├── AlternatingMainJava.java │ ├── AlternatingStats.scala │ ├── AlternatingMain.scala │ └── AlternatingAlgorithm.scala ├── crackerAllOptimizations │ ├── CrackerMainJava.java │ └── CrackerAllOptimizationsMain.scala ├── alternatingOptimized │ ├── AlternatingOptimizedMainJava.java │ ├── AlternatingMessage.scala │ └── AlternatingOptimizedMain.scala └── util │ ├── Main.java │ ├── CCPropertiesImmutable.scala │ ├── CCProperties.scala │ ├── CCUtilIO.scala │ └── CCUtil.scala ├── LICENSE ├── LICENSE.txt ├── README.md └── pom.xml /run/dataset_example: -------------------------------------------------------------------------------- 1 | 1 2 2 | 1 3 3 | 2 4 4 | 3 5 5 | 4 6 6 | 7 8 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | /bin/ 3 | .classpath 4 | .project 5 | .settings 6 | .idea 7 | cracker.iml 8 | -------------------------------------------------------------------------------- /src/cracker/CrackerMessageSize.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | @serializable 4 | trait CrackerMessageSize { 5 | def getMessageSize : Long 6 | } -------------------------------------------------------------------------------- /src/ccf/CcfMessage.scala: -------------------------------------------------------------------------------- 1 | package ccf 2 | 3 | @serializable 4 | class CcfMessage (val cc: Set[Int], val terminate : Boolean) 5 | { 6 | def voteToHalt : Boolean = terminate 7 | } -------------------------------------------------------------------------------- /src/ccf/CcfMainJava.java: -------------------------------------------------------------------------------- 1 | package ccf; 2 | 3 | public class CcfMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | CcfMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/ccmr/CcmrMainJava.java: -------------------------------------------------------------------------------- 1 | package ccmr; 2 | 3 | public class CcmrMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | CcmrMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/sgc/SGCMainJava.java: -------------------------------------------------------------------------------- 1 | package sgc; 2 | 3 | public class SGCMainJava 4 | { 5 | public static void main(final String[] args_) 6 | { 7 | SGCMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/cracker/CrackerMainJava.java: -------------------------------------------------------------------------------- 1 | package cracker; 2 | 3 | public class CrackerMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | CrackerTreeMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/hashMin/HashMinMainJava.java: -------------------------------------------------------------------------------- 1 | package hashMin; 2 | 3 | public class HashMinMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | HashMinMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/hashToMin/HashToMinMainJava.java: -------------------------------------------------------------------------------- 1 | package hashToMin; 2 | 3 | public class HashToMinMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | HashToMinMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/alternating/AlternatingMainJava.java: -------------------------------------------------------------------------------- 1 | package alternating; 2 | 3 | public class AlternatingMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | AlternatingMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/cracker/CrackerMessagePropagation.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | @serializable 4 | class CrackerTreeMessagePropagation (val min : Long, val child : Set[Long]) extends CrackerMessageSize 5 | { 6 | def getMessageSize = child.size + 1 7 | } -------------------------------------------------------------------------------- /src/ccmr/CcmrMessage.scala: -------------------------------------------------------------------------------- 1 | package ccmr 2 | 3 | import scala.collection.immutable.TreeSet 4 | 5 | @serializable 6 | class CcmrMessage (val cc: TreeSet[Long], val iterationNeeded : Boolean) 7 | { 8 | def voteToHalt : Boolean = !iterationNeeded 9 | } -------------------------------------------------------------------------------- /src/crackerAllOptimizations/CrackerMainJava.java: -------------------------------------------------------------------------------- 1 | package crackerAllOptimizations; 2 | 3 | public class CrackerMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | CrackerAllOptimizationsMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/sgc/SGCMessage.scala: -------------------------------------------------------------------------------- 1 | package sgc 2 | 3 | import scala.collection.immutable.TreeSet 4 | 5 | @serializable 6 | class HashToMinMessage (val min: Long, val cc: Set[Long], val sizeBefore : Long) 7 | { 8 | def voteToHalt : Boolean = sizeBefore == cc.size 9 | } -------------------------------------------------------------------------------- /src/alternatingOptimized/AlternatingOptimizedMainJava.java: -------------------------------------------------------------------------------- 1 | package alternatingOptimized; 2 | 3 | public class AlternatingOptimizedMainJava 4 | { 5 | public static void main(String[] args_) 6 | { 7 | AlternatingOptimizedMain.main(args_); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/hashMin/HashMinMessage.scala: -------------------------------------------------------------------------------- 1 | package hashMin 2 | 3 | import scala.collection.immutable.TreeSet 4 | 5 | @serializable 6 | class HashMinMessage (val min: Long, val minBefore : Long, val neigh : Set[Long]) 7 | { 8 | def voteToHalt : Boolean = minBefore == min 9 | } -------------------------------------------------------------------------------- /src/hashToMin/HashToMinMessage.scala: -------------------------------------------------------------------------------- 1 | package hashToMin 2 | 3 | import scala.collection.immutable.TreeSet 4 | 5 | @serializable 6 | class HashToMinMessage (val min: Long, val cc: Set[Long], val sizeBefore : Long) 7 | { 8 | def voteToHalt : Boolean = sizeBefore == cc.size 9 | } -------------------------------------------------------------------------------- /src/alternatingOptimized/AlternatingMessage.scala: -------------------------------------------------------------------------------- 1 | package alternatingOptimized 2 | 3 | @serializable 4 | class AlternatingMessage (val root : Boolean) 5 | { 6 | val isMarkedAsRootNode = root 7 | } 8 | 9 | object AlternatingMessage 10 | { 11 | val empty = new AlternatingMessage(false) 12 | } -------------------------------------------------------------------------------- /run/config_example: -------------------------------------------------------------------------------- 1 | 2 | dataset run/dataset_example 3 | outputFile run/output 4 | printAll true 5 | 6 | edgelistSeparator \t 7 | 8 | jarPath cracker-0.0.1-SNAPSHOT.jar 9 | 10 | sparkPartition 2 11 | #sparkMaster spark://:7077 12 | sparkMaster local[2] 13 | printMessageStat false 14 | -------------------------------------------------------------------------------- /src/cracker/CrackerMessageRedPhase.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | @serializable 4 | class CrackerTreeMessageRedPhase (val first : Option[CrackerTreeMessageIdentification], val second : Option[CrackerTreeMessageTree]) extends CrackerMessageSize 5 | { 6 | def getMessageSize = first.getOrElse(CrackerTreeMessageIdentification.empty).getMessageSize + second.getOrElse(CrackerTreeMessageTree.empty).getMessageSize 7 | } 8 | 9 | object CrackerTreeMessageRedPhase 10 | { 11 | def apply(first : CrackerTreeMessageIdentification) = new CrackerTreeMessageRedPhase(Option.apply(first), Option.empty) 12 | def apply(second : CrackerTreeMessageTree) = new CrackerTreeMessageRedPhase(Option.empty, Option.apply(second)) 13 | } -------------------------------------------------------------------------------- /src/cracker/CrackerMessageIdentification.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | @serializable 4 | class CrackerTreeMessageIdentification (val min: Long, val neigh: Set[Long]) extends CrackerMessageSize 5 | { 6 | def voteToHalt = neigh.isEmpty 7 | 8 | def getMessageSize = neigh.size + 1 9 | 10 | def merge(other : Option[CrackerTreeMessageIdentification]) : Option[CrackerTreeMessageIdentification] = 11 | { 12 | if(other.isDefined) 13 | { 14 | Option.apply(new CrackerTreeMessageIdentification(Math.min(min, other.get.min), neigh ++ other.get.neigh)) 15 | } else 16 | { 17 | Option.apply(CrackerTreeMessageIdentification.this) 18 | } 19 | } 20 | 21 | override def toString = neigh.toString 22 | } 23 | 24 | object CrackerTreeMessageIdentification 25 | { 26 | def empty = new CrackerTreeMessageIdentification(-1, Set()) 27 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | http://opensource.org/licenses/mit-license.php 3 | 4 | Copyright (c) 2015 Thibault Debatty 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | http://opensource.org/licenses/mit-license.php 3 | 4 | Copyright (c) 2015 Thibault Debatty 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/cracker/CrackerMessageTree.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | @serializable 4 | class CrackerTreeMessageTree (val parent : Long, val child : Set[Long]) extends CrackerMessageSize 5 | { 6 | def getMessageSize = child.size + 1 7 | 8 | def merge(other : Option[CrackerTreeMessageTree]) : Option[CrackerTreeMessageTree] = 9 | { 10 | if(other.isDefined) 11 | { 12 | var parentNew = parent 13 | 14 | if(parentNew == -1) 15 | { 16 | parentNew = other.get.parent 17 | } 18 | 19 | Option.apply(new CrackerTreeMessageTree(parentNew, child ++ other.get.child)) 20 | } else 21 | { 22 | Option.apply(CrackerTreeMessageTree.this) 23 | } 24 | } 25 | 26 | def merge(other : CrackerTreeMessageTree) : CrackerTreeMessageTree = 27 | { 28 | var parentNew = parent 29 | 30 | if(parentNew == -1) 31 | { 32 | parentNew = other.parent 33 | } 34 | 35 | new CrackerTreeMessageTree(parentNew, child ++ other.child) 36 | } 37 | 38 | def getMessagePropagation(id : Long) = 39 | { 40 | if(parent == -1) 41 | { 42 | new CrackerTreeMessagePropagation(id, child) 43 | } else 44 | { 45 | new CrackerTreeMessagePropagation(-1, child) 46 | } 47 | } 48 | } 49 | 50 | object CrackerTreeMessageTree 51 | { 52 | def empty = new CrackerTreeMessageTree(-1, Set()) 53 | } -------------------------------------------------------------------------------- /src/util/Main.java: -------------------------------------------------------------------------------- 1 | package util; 2 | 3 | 4 | public class Main 5 | { 6 | public static void main(String[] args_) 7 | { 8 | if(args_.length > 1) 9 | { 10 | String algorithmName = args_[0]; 11 | String[] argsParsed = new String[args_.length - 1]; 12 | 13 | System.arraycopy( args_, 1, argsParsed, 0, args_.length - 1 ); 14 | 15 | switch(algorithmName) 16 | { 17 | case "CRACKER" : 18 | { 19 | // cracker.CrackerMainJava.main(argsParsed); 20 | crackerAllOptimizations.CrackerMainJava.main(argsParsed); 21 | break; 22 | } 23 | case "CRACKERALL" : 24 | { 25 | crackerAllOptimizations.CrackerMainJava.main(argsParsed); 26 | break; 27 | } 28 | case "CCF" : 29 | { 30 | ccf.CcfMainJava.main(argsParsed); 31 | break; 32 | } 33 | case "CCMR" : 34 | { 35 | ccmr.CcmrMainJava.main(argsParsed); 36 | break; 37 | } 38 | case "PEGASUS" : 39 | { 40 | hashMin.HashMinMainJava.main(argsParsed); 41 | break; 42 | } 43 | case "HASHTOMIN" : 44 | { 45 | hashToMin.HashToMinMainJava.main(argsParsed); 46 | break; 47 | } 48 | case "ALTERNATINGOPTIMIZED" : 49 | { 50 | alternatingOptimized.AlternatingOptimizedMainJava.main(argsParsed); 51 | break; 52 | } 53 | default : 54 | { 55 | System.out.println("ERROR: Algorithm name not recognized"); 56 | break; 57 | } 58 | } 59 | 60 | } else 61 | { 62 | System.out.println("ERROR Command input must be: command algorithmName configFile"); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/alternating/AlternatingStats.scala: -------------------------------------------------------------------------------- 1 | package alternating 2 | 3 | import util.CCPropertiesImmutable 4 | import org.apache.spark.SparkContext._ 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.rdd.RDD 7 | import util.CCUtil 8 | import cracker.CrackerStats 9 | 10 | @serializable 11 | class AlternatingStats(property : CCPropertiesImmutable, util : CCUtil, spark : SparkContext) { 12 | 13 | // val crackerStats = new CrackerStats(property, util, spark) 14 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 15 | val reduceInputSizeAccumulator = spark.accumulator(0L) 16 | 17 | def printSimplificationAlternating(step : Int, rdd : RDD[(Long, Set[Long])]) = 18 | { 19 | if (property.printMessageStat) 20 | { 21 | util.printSimplification(step, rdd.count, rdd.map(t=>t._2.size.toLong).reduce{case(a,b)=>a+b}, rdd.map(t=>t._2.size).max) 22 | } 23 | // if(property.printAll) 24 | // { 25 | // printGraph(util, step, "INPUT_BLUE", rdd) 26 | // } 27 | } 28 | 29 | def countMessage(ret : RDD[(Long, Set[Long])], step : Int) = 30 | { 31 | if (property.printMessageStat) { 32 | val previousMessageSize = reduceInputSizeAccumulator.value 33 | val previousMessageNumber = reduceInputMessageNumberAccumulator.value 34 | 35 | ret.foreach(t => reduceInputSizeAccumulator += t._2.size + 1) 36 | reduceInputMessageNumberAccumulator += ret.count 37 | 38 | util.printMessageStep(step, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 39 | } 40 | } 41 | } -------------------------------------------------------------------------------- /src/util/CCPropertiesImmutable.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | @serializable 4 | class CCPropertiesImmutable(algorithmNameFromConfig : String, 5 | val dataset : String, 6 | val dataset2 : String, 7 | val outputFile : String, 8 | val outputFileCC : String, 9 | val jarPath : String, 10 | val sparkMaster : String, 11 | val sparkPartition : Int, 12 | val sparkExecutorMemory : String, 13 | val sparkBlockManagerSlaveTimeoutMs : String, 14 | val sparkCoresMax : Int, 15 | val sparkShuffleManager : String, 16 | val sparkCompressionCodec : String, 17 | val sparkShuffleConsolidateFiles : String, 18 | val sparkAkkaFrameSize : String, 19 | val sparkDriverMaxResultSize : String, 20 | val sparkExecutorInstances : Int, 21 | val separator : String, 22 | val separatorCC : String, 23 | val printMessageStat : Boolean, 24 | val printLargestCC : Boolean, 25 | val printCC : Boolean, 26 | val printCCDistribution : Boolean, 27 | val printAll : Boolean, 28 | val customColumnValue : String, 29 | val switchLocal : Int, 30 | val switchLocalActive : Boolean, 31 | val vertexIdMultiplier : Int, 32 | val vertexNumber : Int, 33 | val loadBalancing : Boolean, 34 | val selfFunction : String, 35 | val cadidateFunction : String, 36 | val selfStar : Boolean, 37 | val transmitPreviousNeighbours : Boolean, 38 | val edgeThreshold : Double, 39 | val coreThreshold : Int, 40 | val invert : Boolean) extends Serializable 41 | { 42 | val algorithmName = if(loadBalancing) algorithmNameFromConfig+"_LOAD" else algorithmNameFromConfig 43 | val appName = algorithmName+"_"+dataset 44 | val allStat = printMessageStat && appName.contains("CRA") 45 | val filenameLargestCC = dataset+"_largestCC" 46 | } -------------------------------------------------------------------------------- /src/cracker/CrackerStats.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | import org.apache.spark.SparkContext._ 4 | import org.apache.spark.SparkContext 5 | import org.apache.spark.rdd.RDD 6 | import util.CCPropertiesImmutable 7 | import util.CCUtil 8 | 9 | @serializable 10 | class CrackerStats(property: CCPropertiesImmutable, util: CCUtil, spark: SparkContext) { 11 | 12 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 13 | val reduceInputSizeAccumulator = spark.accumulator(0L) 14 | 15 | def printSimplification(step: Int, rdd: RDD[(Long, CrackerTreeMessageIdentification)]) = { 16 | if (property.printMessageStat) { 17 | if (rdd.count > 0) 18 | util.printSimplification(step, rdd.count, rdd.map(t => t._2.neigh.size.toLong).sum, rdd.map(t => t._2.neigh.size).max) 19 | else 20 | util.printSimplification(step, 0, 0, 0) 21 | } 22 | } 23 | 24 | def printSimplificationCCF(step: Int, rdd: RDD[(Long, Iterable[Long])]) = { 25 | if (property.printMessageStat) { 26 | val count = rdd.count 27 | if (count > 0) 28 | util.printSimplification(step, count, rdd.map(t => t._2.size.toLong).sum, rdd.map(t => t._2.size).max) 29 | else 30 | util.printSimplification(step, 0, 0, 0) 31 | } 32 | // if(property.printAll) 33 | // { 34 | // printGraph(util, step, "INPUT_BLUE", rdd) 35 | // } 36 | } 37 | 38 | def printMessageStats[A <% CrackerMessageSize](step: Int, rdd: RDD[(Long, A)]) = { 39 | if (property.printMessageStat) { 40 | val previousMessageSize = reduceInputSizeAccumulator.value 41 | val previousMessageNumber = reduceInputMessageNumberAccumulator.value 42 | 43 | rdd.foreach(t => reduceInputSizeAccumulator += t._2.getMessageSize) 44 | reduceInputMessageNumberAccumulator += rdd.count 45 | 46 | util.printMessageStep(step, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 47 | } 48 | } 49 | 50 | def printGraph(util: CCUtil, step: Int, description: String, g: RDD[(Long, CrackerTreeMessageIdentification)]) = { 51 | util.io.printToFile("graph.txt", "STEP " + step + "\t[" + description + "]\t" + g.map(t => "{" + t._1 + " " + t._2.toString + "} ").reduce { case (a, b) => a + b } + "\n") 52 | } 53 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Cracker 2 | ======= 3 | 4 | Crumbling large graphs into connected components 5 | 6 | Abstract—Finding connected components is a fundamental task in applications dealing with graph analytics, such as social network 7 | analysis, web graph mining and image processing. The exponentially growing size of today’s graphs has required the definition of new 8 | computational models and algorithms for their efficient processing on highly distributed architectures. In this paper we present 9 | CRACKER, an efficient iterative MapReduce-like algorithm to detect connected components in large graphs. The strategy of CRACKER 10 | is to transform the input graph in a set of trees, one for each connected component in the graph. Nodes are iteratively removed from 11 | the graph and added to the trees, reducing the amount of computation at each iteration. We prove the correctness of the algorithm, 12 | evaluate its computational cost and provide an extensive experimental evaluation considering a wide variety of synthetic and real-world 13 | graphs. The experimental results show that CRACKER consistently outperforms state-of-the-art approaches both in terms of total 14 | computation time and volume of messages exchanged. 15 | 16 | 17 | ### Publications 18 | 19 | **2016 - IEEE Transaction on Parallel and Distributed Systems** 20 | 21 | Lulli, Alessandro, et al. 22 | **Fast Connected Components Computation in Large Graphs by Vertex Pruning.** 23 | IEEE Transactions on parallel and distributed systems (2016) (to appear). 24 | 25 | @article{lulli2016fast, 26 | title={Fast Connected Components Computation in Large Graphs by Vertex Pruning}, 27 | author={Lulli, Alessandro and Carlini, Emanuele and Dazzi, Patrizio and Lucchese, Claudio and Ricci, Laura}, 28 | journal={IEEE Transactions on parallel and distributed systems}, 29 | year={2016}, 30 | publisher={IEEE} 31 | } 32 | 33 | **2015 - IEEE Symposium on Computers and Communication (ISCC)** 34 | 35 | Lulli, Alessandro, et al. 36 | **Cracker: Crumbling large graphs into connected components.** 37 | 2015 IEEE Symposium on Computers and Communication (ISCC). IEEE, 2015. 38 | 39 | @inproceedings{lulli2015cracker, 40 | title={Cracker: Crumbling large graphs into connected components}, 41 | author={Lulli, Alessandro and Ricci, Laura and Carlini, Emanuele and Dazzi, Patrizio and Lucchese, Claudio}, 42 | booktitle={2015 IEEE Symposium on Computers and Communication (ISCC)}, 43 | pages={574--581}, 44 | year={2015}, 45 | organization={IEEE} 46 | } 47 | 48 | ### How to build 49 | 50 | mvn clean package 51 | 52 | ### How to run 53 | 54 | spark-submit --class util.Main --executor-cores <#core> --driver-memory <#memory>g --master spark://:7077 target/cracker-0.0.1-SNAPSHOT.jar CRACKER config_example 55 | 56 | -------------------------------------------------------------------------------- /src/alternating/AlternatingMain.scala: -------------------------------------------------------------------------------- 1 | package alternating 2 | 3 | import java.io.FileWriter 4 | import scala.collection.immutable.TreeSet 5 | import scala.collection.mutable.ListBuffer 6 | import org.apache.spark.Accumulator 7 | import org.apache.spark.SparkContext._ 8 | import org.apache.spark.SparkContext 9 | import org.apache.spark.rdd.RDD 10 | import util.CCUtil 11 | import util.CCProperties 12 | import cracker.CrackerStats 13 | 14 | object AlternatingMain { 15 | 16 | 17 | def main(args : Array[String]) : Unit = 18 | { 19 | val timeBegin = System.currentTimeMillis() 20 | 21 | val property = new CCProperties("ALTERNATING", args(0)).load.getImmutable 22 | 23 | val util = new CCUtil(property) 24 | 25 | val spark = util.getSparkContext() 26 | val alternating = new AlternatingAlgorithm 27 | val stats = new AlternatingStats(property, util, spark) 28 | 29 | val timeSparkLoaded = System.currentTimeMillis() 30 | val file = spark.textFile( property.dataset , property.sparkPartition) 31 | 32 | util.io.printFileStart(property.appName) 33 | 34 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 35 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 36 | 37 | var ret = fusedData.flatMap(alternating.generateInitialEdge).reduceByKey(alternating.reduceMessageByKey).cache //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) ) 38 | ret.count 39 | 40 | val timeDataLoaded = System.currentTimeMillis() 41 | 42 | var control = false; 43 | var step = 0 44 | 45 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 46 | val reduceInputSizeAccumulator = spark.accumulator(0L) 47 | 48 | var previousRDDForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache 49 | previousRDDForConvergence.count 50 | 51 | while (!control) { 52 | val timeStepStart = System.currentTimeMillis() 53 | 54 | stats.printSimplificationAlternating(step, ret) 55 | ret = ret.flatMap(item => alternating.largeStarMap(item)) 56 | 57 | stats.countMessage(ret, step) 58 | 59 | ret = ret.reduceByKey(alternating.reduceMessageByKey).flatMap(alternating.largeStarReduce) 60 | 61 | stats.countMessage(ret, step) 62 | 63 | ret = ret.reduceByKey(alternating.reduceMessageByKey).cache 64 | 65 | val timeStepLarge = System.currentTimeMillis() 66 | util.io.printTime(timeStepStart, timeStepLarge, "large") 67 | util.printTimeStep(step, timeStepLarge-timeStepStart) 68 | stats.printSimplificationAlternating(step+1, ret) 69 | 70 | ret = ret.flatMap(alternating.smallStarReduce) 71 | 72 | stats.countMessage(ret, step) 73 | 74 | ret = ret.reduceByKey(alternating.reduceMessageByKey).cache 75 | 76 | val rddForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache 77 | control = previousRDDForConvergence.leftOuterJoin(rddForConvergence).map(t => if(t._2._2.isDefined) t._2._1 == t._2._2.get else false).cache.reduce{case(a,b) => a&&b} 78 | previousRDDForConvergence = rddForConvergence 79 | 80 | val timeStepSmall = System.currentTimeMillis() 81 | 82 | step = step + 3 83 | util.io.printTime(timeStepLarge, timeStepSmall, "small") 84 | util.printTimeStep(step+1, timeStepSmall-timeStepLarge) 85 | } 86 | 87 | val timeEnd = System.currentTimeMillis() 88 | 89 | util.testEnded( ret.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2)), 90 | step, 91 | timeBegin, 92 | timeEnd, 93 | timeSparkLoaded, 94 | timeDataLoaded, 95 | reduceInputMessageNumberAccumulator.value, 96 | reduceInputSizeAccumulator.value) 97 | 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/hashToMin/HashToMinMain.scala: -------------------------------------------------------------------------------- 1 | package hashToMin 2 | 3 | import java.io.FileWriter 4 | import scala.collection.mutable.ListBuffer 5 | import org.apache.spark.SparkContext._ 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.rdd.RDD 8 | import util.CCUtil 9 | import util.CCProperties 10 | import util.CCProperties 11 | 12 | object HashToMinMain { 13 | def emitBlue(item: (Long, HashToMinMessage)): Iterable[(Long, HashToMinMessage)] = { 14 | var outputList: ListBuffer[(Long, HashToMinMessage)] = new ListBuffer 15 | 16 | val min = item._2.min 17 | 18 | val it = item._2.cc.iterator 19 | 20 | if (min == item._1) { 21 | outputList.prepend((item._1, new HashToMinMessage(min, item._2.cc, item._2.cc.size))) 22 | } else { 23 | outputList.prepend((item._1, new HashToMinMessage(min, Set(min), item._2.cc.size))) 24 | } 25 | 26 | while (it.hasNext) { 27 | val next = it.next 28 | 29 | if (next != item._1) { 30 | if (next == min) { 31 | outputList.prepend((next, new HashToMinMessage(min, item._2.cc, -1))) 32 | } else { 33 | outputList.prepend((next, new HashToMinMessage(min, Set(min), -1))) 34 | } 35 | } 36 | } 37 | 38 | outputList.toIterable 39 | } 40 | 41 | def reduceBlue(item1: HashToMinMessage, item2: HashToMinMessage): HashToMinMessage = { 42 | val ret = item1.cc ++ item2.cc 43 | val min = Math.min(item1.min, item2.min) 44 | var size = item1.sizeBefore 45 | if (size == -1) size = item2.sizeBefore 46 | new HashToMinMessage(min, ret, size) 47 | } 48 | 49 | def main(args: Array[String]): Unit = { 50 | val timeBegin = System.currentTimeMillis() 51 | 52 | val property = new CCProperties("HASHTOMIN", args(0)).load.getImmutable 53 | 54 | val util = new CCUtil(property) 55 | val spark = util.getSparkContext() 56 | 57 | val timeSparkLoaded = System.currentTimeMillis() 58 | val file = spark.textFile(property.dataset, property.sparkPartition) 59 | 60 | util.io.printFileStart(property.appName) 61 | 62 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 63 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 64 | 65 | var ret = fusedData.map(item => (item._1, new HashToMinMessage(item._2.toSet.min, item._2.toSet, -1))) 66 | 67 | val timeDataLoaded = System.currentTimeMillis() 68 | 69 | var control = false; 70 | var step = 0 71 | 72 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 73 | val reduceInputSizeAccumulator = spark.accumulator(0L) 74 | 75 | while (!control) { 76 | val timeStepStart = System.currentTimeMillis() 77 | 78 | val previous = ret 79 | val retMap = ret.flatMap(item => emitBlue(item)) 80 | 81 | if (property.printMessageStat) { 82 | val previousMessageSize = reduceInputSizeAccumulator.value 83 | val previousMessageNumber = reduceInputMessageNumberAccumulator.value 84 | 85 | retMap.foreach(t => reduceInputSizeAccumulator += t._2.cc.size + 2) 86 | reduceInputMessageNumberAccumulator += retMap.count 87 | 88 | util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 89 | } 90 | 91 | ret = retMap.reduceByKey(reduceBlue).cache 92 | ret.foreach(x => {}) 93 | 94 | val controlMap = ret.map(t => t._2.voteToHalt) 95 | control = controlMap.reduce { case (a, b) => a && b } 96 | // try 97 | // { 98 | // control = controlMap.reduce{case (a,b) => a && b} 99 | // } 100 | // catch 101 | // { 102 | // case e : Exception => control = false 103 | // } 104 | 105 | val timeStepBlue = System.currentTimeMillis() 106 | 107 | step = step + 1 108 | util.io.printTime(timeStepStart, timeStepBlue, "blue") 109 | util.printTimeStep(step, timeStepBlue - timeStepStart) 110 | 111 | ret.checkpoint 112 | previous.unpersist(false) 113 | retMap.unpersist(false) 114 | controlMap.unpersist(false) 115 | } 116 | 117 | val timeEnd = System.currentTimeMillis() 118 | 119 | util.testEnded(ret.map(t => (t._2.min, t._2.cc.size)).reduceByKey { case (a, b) => Math.max(a, b) }, 120 | step, 121 | timeBegin, 122 | timeEnd, 123 | timeSparkLoaded, 124 | timeDataLoaded, 125 | reduceInputMessageNumberAccumulator.value, 126 | reduceInputSizeAccumulator.value) 127 | } 128 | 129 | } -------------------------------------------------------------------------------- /src/hashMin/HashMinMain.scala: -------------------------------------------------------------------------------- 1 | package hashMin 2 | 3 | import java.io.FileWriter 4 | import scala.collection.mutable.ListBuffer 5 | import org.apache.spark.SparkContext._ 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.rdd.RDD 8 | import util.CCUtil 9 | import util.CCProperties 10 | 11 | object HashMinMain 12 | { 13 | def emitBlue( item : ( Long, HashMinMessage ) ) : Iterable[( Long, HashMinMessage )] = 14 | { 15 | var outputList : ListBuffer[( Long, HashMinMessage )] = new ListBuffer 16 | 17 | val min = item._2.min 18 | 19 | val it = item._2.neigh.iterator 20 | 21 | outputList.prepend( ( item._1, new HashMinMessage( min, min, item._2.neigh) ) ) 22 | 23 | while(it.hasNext) 24 | { 25 | val next = it.next 26 | 27 | if(next != item._1) 28 | { 29 | outputList.prepend( ( next, new HashMinMessage( min, -1 , Set()) ) ) 30 | } 31 | } 32 | 33 | outputList.toIterable 34 | } 35 | 36 | def reduceBlue( item1 : HashMinMessage, item2 : HashMinMessage ) : HashMinMessage = 37 | { 38 | val ret = item1.neigh ++ item2.neigh 39 | val min = Math.min( item1.min, item2.min ) 40 | var minBefore = item1.minBefore 41 | if(minBefore == -1) minBefore = item2.minBefore 42 | new HashMinMessage( min, minBefore , ret ) 43 | } 44 | 45 | def main( args : Array[String] ) : Unit = 46 | { 47 | val timeBegin = System.currentTimeMillis() 48 | 49 | val property = new CCProperties("PEGASUS", args(0)).load.getImmutable 50 | 51 | val util = new CCUtil(property) 52 | val spark = util.getSparkContext() 53 | 54 | val timeSparkLoaded = System.currentTimeMillis() 55 | val file = spark.textFile( property.dataset , property.sparkPartition) 56 | 57 | util.io.printFileStart(property.appName) 58 | 59 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 60 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 61 | var ret = fusedData.map( item => ( item._1, new HashMinMessage( item._2.toSet.min, -1, item._2.toSet) ) ) 62 | 63 | val timeDataLoaded = System.currentTimeMillis() 64 | 65 | var control = false; 66 | var step = 0 67 | 68 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 69 | val reduceInputSizeAccumulator = spark.accumulator(0L) 70 | 71 | while ( !control ) { 72 | val timeStepStart = System.currentTimeMillis() 73 | 74 | val previous = ret 75 | val mapResult = ret.flatMap( item => emitBlue( item ) ) 76 | 77 | if(property.printMessageStat) 78 | { 79 | val previousMessageSize = reduceInputSizeAccumulator.value 80 | val previousMessageNumber = reduceInputMessageNumberAccumulator.value 81 | 82 | mapResult.foreach(t => reduceInputSizeAccumulator += t._2.neigh.size + 2) 83 | reduceInputMessageNumberAccumulator += mapResult.count 84 | 85 | util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 86 | } 87 | 88 | ret = mapResult.reduceByKey( reduceBlue ).cache 89 | 90 | val controlMap = ret.map(t => t._2.voteToHalt) 91 | // val check = controlMap.filter(t=> (!t)).count 92 | // util.io.printStat(check, "active") 93 | control = controlMap.reduce{case (a,b) => a && b} 94 | 95 | val timeStepBlue = System.currentTimeMillis() 96 | 97 | step = step + 1 98 | util.io.printTime( timeStepStart, timeStepBlue, "blue" ) 99 | util.printTimeStep(step, timeStepBlue-timeStepStart) 100 | 101 | ret.checkpoint 102 | mapResult.unpersist(false) 103 | previous.unpersist(false) 104 | controlMap.unpersist(false) 105 | } 106 | 107 | val timeEnd = System.currentTimeMillis() 108 | 109 | util.testEnded( ret.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}, 110 | step, 111 | timeBegin, 112 | timeEnd, 113 | timeSparkLoaded, 114 | timeDataLoaded, 115 | reduceInputMessageNumberAccumulator.value, 116 | reduceInputSizeAccumulator.value) 117 | } 118 | 119 | } -------------------------------------------------------------------------------- /src/util/CCProperties.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import java.util.Properties 4 | import java.io.InputStream 5 | import java.io.FileInputStream 6 | 7 | 8 | class CCProperties(algorithmName: String, configurationFile : String) extends Serializable 9 | { 10 | val property = new Properties 11 | 12 | def load() : CCProperties = 13 | { 14 | var input : InputStream = null 15 | 16 | input = new FileInputStream(configurationFile); 17 | 18 | property.load(input); 19 | 20 | this 21 | } 22 | 23 | def get(data : String, default : String) = 24 | { 25 | property.getProperty(data, default) 26 | } 27 | 28 | def getBoolean(data : String, default : Boolean) = 29 | { 30 | get(data, default.toString).toBoolean 31 | } 32 | 33 | def getInt(data : String, default : Int) = 34 | { 35 | get(data, default.toString).toInt 36 | } 37 | 38 | def getDouble(data : String, default : Double) = 39 | { 40 | get(data, default.toString).toDouble 41 | } 42 | 43 | def getImmutable : CCPropertiesImmutable = 44 | { 45 | val dataset = get("dataset", "") 46 | val dataset2 = get("dataset2", "") 47 | val jarPath = get("jarPath", "") 48 | val sparkMaster = get("sparkMaster", "local[2]") 49 | val sparkExecutorMemory = get("sparkExecutorMemory", "14g") 50 | val sparkPartition = get("sparkPartition", "32").toInt 51 | val sparkBlockManagerSlaveTimeoutMs= get("sparkBlockManagerSlaveTimeoutMs", "45000") 52 | val sparkCoresMax = get("sparkCoresMax", "-1").toInt 53 | val sparkAkkaFrameSize = get("sparkAkkaFrameSize", "100").toString 54 | val sparkShuffleManager = get("sparkShuffleManager", "SORT").toString 55 | val sparkCompressionCodec = get("sparkCompressionCodec", "snappy").toString 56 | val sparkShuffleConsolidateFiles = get("sparkShuffleConsolidateFiles", "false").toString 57 | val sparkDriverMaxResultSize = get("sparkDriverMaxResultSize", "1g").toString 58 | var separator = get("edgelistSeparator", "space") 59 | var separatorCC = get("edgelistSeparatorCC", "space") 60 | if(separator.equals("space")) separator = " " 61 | if(separatorCC.equals("space")) separatorCC = " " 62 | val printMessageStat = get("printMessageStat", "false").toBoolean 63 | val printLargestCC = get("printLargestCC", "false").toBoolean 64 | val printCC = get("printCC", "false").toBoolean 65 | val printCCDistribution = get("printCCDistribution", "false").toBoolean 66 | val printAll = get("printAll", "false").toBoolean 67 | val customColumnValue = get("customColumnValue", "") 68 | val algorithmNameFromConfiguration = get("algorithmName", algorithmName) 69 | val switchLocal = get("switchLocal", "0").toInt 70 | val switchLocalActive = switchLocal != -1 71 | val vertexIdMultiplier = get("vertexIdMultiplier", "-1").toInt 72 | val loadBalancing = get("loadBalancing", "false").toBoolean 73 | val vertexNumber = get("vertexNumber", "-1").toInt 74 | val outputFile = get("outputFile", "") 75 | val outputFileCC = get("outputFileCC", "") 76 | val coreThreshold = getInt("coreThreshold", 10) 77 | val invert = get("invert", "false").toBoolean 78 | 79 | //############# WITH YARN 80 | val sparkExecutorInstances = get("sparkExecutorInstances", "-1").toInt 81 | 82 | //################## DIAMETER 83 | val selfFunction = get("selfFunction", "DISTANCE_MAX") 84 | 85 | 86 | val candidateFunction = get("candidateFunction", "MAX") 87 | 88 | val selfStar = get("selfStar", "true").toBoolean 89 | val transmitPreviousNeighbours = get("transmitPreviousNeighbours", "true").toBoolean 90 | val edgeThreshold = getDouble("edgeThreshold", -1) 91 | 92 | new CCPropertiesImmutable( algorithmNameFromConfiguration, 93 | dataset, 94 | dataset2, 95 | outputFile, 96 | outputFileCC, 97 | jarPath, 98 | sparkMaster, 99 | sparkPartition, 100 | sparkExecutorMemory, 101 | sparkBlockManagerSlaveTimeoutMs, 102 | sparkCoresMax, 103 | sparkShuffleManager, 104 | sparkCompressionCodec, 105 | sparkShuffleConsolidateFiles, 106 | sparkAkkaFrameSize, 107 | sparkDriverMaxResultSize, 108 | sparkExecutorInstances, 109 | separator, 110 | separatorCC, 111 | printMessageStat, 112 | printLargestCC, 113 | printCC, 114 | printCCDistribution, 115 | printAll, 116 | customColumnValue, 117 | switchLocal, 118 | switchLocalActive, 119 | vertexIdMultiplier, 120 | vertexNumber, 121 | loadBalancing, 122 | selfFunction, 123 | candidateFunction, 124 | selfStar, 125 | transmitPreviousNeighbours, 126 | edgeThreshold, 127 | coreThreshold, 128 | invert) 129 | } 130 | } -------------------------------------------------------------------------------- /src/ccf/CcfMain.scala: -------------------------------------------------------------------------------- 1 | package ccf 2 | 3 | import java.io.FileWriter 4 | import scala.collection.immutable.TreeSet 5 | import scala.collection.mutable.ListBuffer 6 | import org.apache.spark.Accumulator 7 | import org.apache.spark.SparkContext._ 8 | import org.apache.spark.SparkContext 9 | import org.apache.spark.rdd.RDD 10 | import util.CCUtil 11 | import util.CCProperties 12 | 13 | // TODO fail to recognize component of size = 1 14 | object CcfMain { 15 | def ccfIterateMap(item : (Long, Long)) : Iterable[(Long, Long)] = 16 | { 17 | var outputList : ListBuffer[(Long, Long)] = new ListBuffer 18 | 19 | outputList.prepend((item._1, item._2)) 20 | outputList.prepend((item._2, item._1)) 21 | 22 | outputList.toIterable 23 | } 24 | 25 | def ccfIterateReduce(item : (Long, Iterable[Long])) : Iterable[(Long, Long)] = 26 | { 27 | var terminate = true 28 | var outputList : ListBuffer[(Long, Long)] = new ListBuffer 29 | 30 | var min = item._1 31 | val it = item._2.iterator 32 | var valueList : List[Long] = List() 33 | 34 | while (it.hasNext) { 35 | val next = it.next 36 | valueList = next :: valueList 37 | if (next < min) { 38 | min = next 39 | } 40 | } 41 | 42 | if (min < item._1) { 43 | outputList.prepend((item._1, min)) 44 | val it2 = valueList.iterator 45 | while (it2.hasNext) { 46 | val next = it2.next 47 | if (min != next) { 48 | outputList.prepend((next, min)) 49 | terminate = false 50 | } 51 | } 52 | } 53 | 54 | if (!terminate) { 55 | // ack! ugly! 56 | outputList.prepend((-1, min)) 57 | } 58 | 59 | outputList.toIterable 60 | } 61 | 62 | def ccfDedupMap(item : (Long, Long)) : ((Long, Long), Long) = 63 | { 64 | ((item._1, item._2), -1) 65 | } 66 | 67 | def ccfDedupReduce(item : ((Long, Long), Iterable[Long])) : (Long, Long) = 68 | { 69 | (item._1._1, item._1._2) 70 | } 71 | 72 | def reduceBlue(item1 : CcfMessage, item2 : CcfMessage) : CcfMessage = 73 | { 74 | new CcfMessage(item1.cc ++ item2.cc, item1.terminate || item2.terminate) 75 | } 76 | 77 | def main(args : Array[String]) : Unit = 78 | { 79 | val timeBegin = System.currentTimeMillis() 80 | 81 | val property = new CCProperties("CCF", args(0)).load.getImmutable 82 | 83 | val util = new CCUtil(property) 84 | 85 | val spark = util.getSparkContext() 86 | 87 | val timeSparkLoaded = System.currentTimeMillis() 88 | val file = spark.textFile( property.dataset , property.sparkPartition) 89 | 90 | util.io.printFileStart(property.appName) 91 | 92 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 93 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 94 | 95 | var ret = parsedData //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) ) 96 | 97 | val timeDataLoaded = System.currentTimeMillis() 98 | 99 | var control = false; 100 | var step = 0 101 | 102 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 103 | val reduceInputSizeAccumulator = spark.accumulator(0L) 104 | 105 | while (!control) { 106 | val timeStepStart = System.currentTimeMillis() 107 | 108 | var tmp = ret.flatMap(item => ccfIterateMap(item)).groupByKey 109 | 110 | var previousMessageSize = 0L 111 | var previousMessageNumber = 0L 112 | 113 | if (property.printMessageStat) { 114 | previousMessageSize = reduceInputSizeAccumulator.value 115 | previousMessageNumber = reduceInputMessageNumberAccumulator.value 116 | 117 | tmp.foreach(t => reduceInputSizeAccumulator += t._2.size + 1) 118 | reduceInputMessageNumberAccumulator += tmp.count 119 | } 120 | 121 | ret = tmp.flatMap(ccfIterateReduce) 122 | 123 | control = ret.filter(t => t._1 == -1).count == 0 124 | 125 | ret = ret.filter(t => t._1 != -1) 126 | 127 | val tmp2 = ret.map(item => ccfDedupMap(item)).groupByKey 128 | 129 | if (property.printMessageStat) { 130 | tmp2.foreach(t => reduceInputSizeAccumulator += 3) 131 | reduceInputMessageNumberAccumulator += tmp2.count 132 | 133 | util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 134 | } 135 | 136 | ret = tmp2.map(ccfDedupReduce) 137 | 138 | val timeStepBlue = System.currentTimeMillis() 139 | 140 | step = step + 1 141 | util.io.printTime(timeStepStart, timeStepBlue, "blue") 142 | util.printTimeStep(step, timeStepBlue-timeStepStart) 143 | } 144 | 145 | val timeEnd = System.currentTimeMillis() 146 | 147 | util.testEnded( ret.map(t=> (t._2, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2 + 1)), 148 | step, 149 | timeBegin, 150 | timeEnd, 151 | timeSparkLoaded, 152 | timeDataLoaded, 153 | reduceInputMessageNumberAccumulator.value, 154 | reduceInputSizeAccumulator.value) 155 | 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/alternatingOptimized/AlternatingOptimizedMain.scala: -------------------------------------------------------------------------------- 1 | package alternatingOptimized 2 | 3 | import scala.collection.mutable.ListBuffer 4 | import org.apache.spark.Accumulator 5 | import org.apache.spark.SparkContext._ 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.rdd.RDD 8 | import cracker.CrackerStats 9 | import util.CCProperties 10 | import util.CCUtil 11 | import com.google.common.base.Joiner 12 | import java.io.FileWriter 13 | import alternating.AlternatingAlgorithm 14 | import alternating.AlternatingStats 15 | 16 | object AlternatingOptimizedMain { 17 | 18 | 19 | def main(args : Array[String]) : Unit = 20 | { 21 | val timeBegin = System.currentTimeMillis() 22 | 23 | val property = new CCProperties("ALTERNATINGOPTIMIZED", args(0)).load.getImmutable 24 | 25 | val util = new CCUtil(property) 26 | 27 | val spark = util.getSparkContext() 28 | val alternating = new AlternatingAlgorithm 29 | val stats = new AlternatingStats(property, util, spark) 30 | 31 | val timeSparkLoaded = System.currentTimeMillis() 32 | val file = spark.textFile( property.dataset , property.sparkPartition) 33 | 34 | util.io.printFileStart(property.appName) 35 | util.io.printStat(property.vertexIdMultiplier, "idMultiplier") 36 | 37 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 38 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 39 | 40 | // var ret = fusedData.flatMap(alternating.generateInitialEdge).reduceByKey(alternating.reduceMessageByKey).cache //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) ) 41 | var ret = fusedData.map(t => (t._1, t._2.toSet)).cache 42 | 43 | val timeDataLoaded = System.currentTimeMillis() 44 | ret.count 45 | 46 | var control = false; 47 | var step = 0 48 | 49 | var previousRDDForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache 50 | previousRDDForConvergence.count 51 | 52 | while (!control) { 53 | val timeStepStart = System.currentTimeMillis() 54 | 55 | stats.printSimplificationAlternating(step, ret) 56 | var previousRet = ret 57 | ret = ret.flatMap(item => alternating.largeStarMapOptimized(item, property.vertexIdMultiplier)).cache 58 | 59 | ret.first() 60 | previousRet.unpersist() 61 | 62 | stats.countMessage(ret, step + 1) 63 | 64 | previousRet = ret 65 | ret = ret.reduceByKey(alternating.reduceMessageByKey).flatMap(item => alternating.largeStarReduceOptimized(item)) 66 | 67 | stats.countMessage(ret, step + 2) 68 | 69 | var previousRet2 = ret 70 | ret = ret.reduceByKey(alternating.reduceMessageByKey).cache 71 | 72 | ret.first() 73 | previousRet.unpersist() 74 | previousRet2.unpersist() 75 | 76 | val timeStepLarge = System.currentTimeMillis() 77 | util.io.printTime(timeStepStart, timeStepLarge, "large") 78 | util.printTimeStep(step, timeStepLarge-timeStepStart) 79 | stats.printSimplificationAlternating(step+1, ret) 80 | 81 | previousRet = ret 82 | ret = ret.flatMap(alternating.smallStarReduce) 83 | 84 | stats.countMessage(ret, step + 3) 85 | 86 | previousRet2 = ret 87 | ret = ret.reduceByKey(alternating.reduceMessageByKey).cache 88 | 89 | val rddForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache 90 | control = rddForConvergence.leftOuterJoin(previousRDDForConvergence).map(t => if(t._2._2.isDefined) t._2._1 == t._2._2.get else false).cache.reduce{case(a,b) => a&&b} 91 | previousRDDForConvergence = rddForConvergence 92 | 93 | val timeStepSmall = System.currentTimeMillis() 94 | 95 | stats.printSimplificationAlternating(step + 3, ret) 96 | step = step + 3 97 | util.io.printTime(timeStepLarge, timeStepSmall, "small") 98 | util.printTimeStep(step+1, timeStepSmall-timeStepLarge) 99 | 100 | ret.first() 101 | previousRet.unpersist() 102 | previousRet2.unpersist() 103 | } 104 | 105 | val timeAdjustingAdditionalVertexForLoadBalancingStart = System.currentTimeMillis() 106 | 107 | val rddLabeled = ret.map(t=> (t._1, t._2.min)) 108 | val rddLabeledInverted = rddLabeled.map(t=> (t._2, t._1)) 109 | 110 | val resultJoin = rddLabeledInverted.leftOuterJoin(rddLabeled).map(t=>(t._2._1, t._2._2)).filter(t=>t._2.isDefined).map(t=>(t._1,t._2.get)) 111 | val result = rddLabeled.leftOuterJoin(resultJoin).map(t=> if(t._2._2.isDefined) (t._1, Math.min(t._2._1,t._2._2.get)) else (t._1, t._2._1)) 112 | 113 | val timeEnd = System.currentTimeMillis() 114 | util.io.printTime(timeAdjustingAdditionalVertexForLoadBalancingStart, timeEnd, "timeAdjustingAdditionalVertexForLoadBalancingStart") 115 | 116 | util.testEnded( 117 | result.filter(t => t._1%property.vertexIdMultiplier==0).groupByKey.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2)), 118 | step, 119 | timeBegin, 120 | timeEnd, 121 | timeSparkLoaded, 122 | timeDataLoaded, 123 | stats.reduceInputMessageNumberAccumulator.value, 124 | stats.reduceInputSizeAccumulator.value) 125 | 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/alternating/AlternatingAlgorithm.scala: -------------------------------------------------------------------------------- 1 | package alternating 2 | 3 | import scala.collection.mutable.ListBuffer 4 | 5 | @serializable 6 | class AlternatingAlgorithm { 7 | def generateInitialEdge(item : (Long, Iterable[Long])) : Iterable[(Long, Set[Long])] = 8 | { 9 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 10 | 11 | val it = item._2.toSet.iterator 12 | while (it.hasNext) { 13 | val next = it.next 14 | outputList.prepend((item._1, Set(next))) 15 | } 16 | 17 | outputList.toIterable 18 | } 19 | 20 | def smallStarMap(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] = 21 | { 22 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 23 | 24 | val it2 = item._2.iterator 25 | while (it2.hasNext) { 26 | val next = it2.next 27 | if(next <= item._1) 28 | { 29 | outputList.prepend((item._1, Set(next))) 30 | } else 31 | { 32 | outputList.prepend((next, Set(item._1))) 33 | } 34 | } 35 | 36 | outputList.toIterable 37 | } 38 | 39 | def smallStarReduce(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] = 40 | { 41 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 42 | 43 | var min = Math.min( item._1, item._2.min) 44 | val it2 = item._2.iterator 45 | // var valueList : Set[Long] = Set() 46 | // 47 | // while (it.hasNext) { 48 | // val next = it.next 49 | // valueList = valueList + next 50 | // if (next < min) { 51 | // min = next 52 | // } 53 | // } 54 | 55 | // val it2 = valueList.iterator 56 | while (it2.hasNext) { 57 | val next = it2.next 58 | outputList.prepend((next, Set(min))) 59 | } 60 | 61 | outputList.prepend((item._1, Set(min))) 62 | 63 | outputList.toIterable 64 | } 65 | 66 | def largeStarMapOptimized(item: (Long, Set[Long]), limit : Int) : Iterable[(Long, Set[Long])] = 67 | { 68 | val sizeNeighborhood = item._2.size 69 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 70 | 71 | // if(info.isDefined && info.get.isMarkedAsRootNode) 72 | // { 73 | // outputList.prepend((Option(item._2, item._1), Option.empty)) 74 | // } 75 | // else 76 | 77 | val it = item._2.iterator 78 | 79 | if(item._1 == item._2.min) 80 | { 81 | while(it.hasNext) 82 | { 83 | val next = it.next 84 | outputList.prepend((next, Set(item._1))) 85 | } 86 | } 87 | else if(sizeNeighborhood > limit && item._1 %limit==0) 88 | { 89 | while(it.hasNext) 90 | { 91 | val next = it.next 92 | val hash = item._1 + (next % (limit-1)) + 1 93 | outputList.prepend((item._1, Set(hash))) 94 | outputList.prepend((hash, Set(next))) 95 | } 96 | 97 | } 98 | else 99 | { 100 | while(it.hasNext) 101 | { 102 | val next = it.next 103 | outputList.prepend((item._1, Set(next))) 104 | outputList.prepend((next, Set(item._1))) 105 | } 106 | 107 | } 108 | 109 | outputList.toIterable 110 | } 111 | 112 | def reduceMessageByKey(a : Set[Long], b : Set[Long]) : Set[Long] = 113 | { 114 | a++b 115 | } 116 | 117 | def largeStarReduceOptimized(item: (Long, Set[Long])) : Iterable[(Long, Set[Long])] = 118 | { 119 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 120 | 121 | var min = Math.min(item._1, item._2.min) 122 | val it2 = item._2.iterator 123 | var valueList : Set[Long] = Set() 124 | 125 | // while (it.hasNext) { 126 | // val next = it.next 127 | // valueList = valueList + next 128 | // if (next < min) { 129 | // min = next 130 | // } 131 | // } 132 | // 133 | // val it2 = valueList.iterator 134 | while (it2.hasNext) { 135 | val next = it2.next 136 | if (next > item._1) { 137 | outputList.prepend((next, Set(min))) 138 | } 139 | } 140 | 141 | outputList.prepend((item._1, Set(min))) 142 | 143 | // outputList.prepend((Option.empty, Option(item._1, new AlternatingMessage(item._1 == min)))) 144 | 145 | outputList.toIterable 146 | } 147 | 148 | def largeStarMap(item: (Long, Set[Long])) : Iterable[(Long, Set[Long])] = 149 | { 150 | val sizeNeighborhood = item._2.toSet.size 151 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 152 | 153 | // if(info.isDefined && info.get.isMarkedAsRootNode) 154 | // { 155 | // outputList.prepend((Option(item._2, item._1), Option.empty)) 156 | // } 157 | // else 158 | 159 | val it = item._2.iterator 160 | 161 | while(it.hasNext) 162 | { 163 | val next = it.next 164 | outputList.prepend((item._1, Set(next))) 165 | outputList.prepend((next, Set(item._1))) 166 | } 167 | 168 | outputList.toIterable 169 | } 170 | 171 | def largeStarReduce(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] = 172 | { 173 | var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer 174 | 175 | var min = item._1 176 | val it = item._2.iterator 177 | var valueList : Set[Long] = Set() 178 | 179 | while (it.hasNext) { 180 | val next = it.next 181 | valueList = valueList + next 182 | if (next < min) { 183 | min = next 184 | } 185 | } 186 | 187 | val it2 = valueList.iterator 188 | while (it2.hasNext) { 189 | val next = it2.next 190 | if (next > item._1) { 191 | outputList.prepend((next, Set(min))) 192 | } 193 | } 194 | 195 | outputList.prepend((item._1, Set(min))) 196 | 197 | outputList.toIterable 198 | } 199 | } -------------------------------------------------------------------------------- /src/ccmr/CcmrMain.scala: -------------------------------------------------------------------------------- 1 | package ccmr 2 | 3 | import java.io.FileWriter 4 | import scala.collection.immutable.TreeSet 5 | import scala.collection.mutable.ListBuffer 6 | import org.apache.spark.Accumulator 7 | import org.apache.spark.SparkContext._ 8 | import org.apache.spark.SparkContext 9 | import org.apache.spark.rdd.RDD 10 | import util.CCUtil 11 | import util.CCProperties 12 | 13 | // not working for livejournal dataset 14 | object CcmrMain 15 | { 16 | def emitBlue( item : ( Long, CcmrMessage ) ) : Iterable[( Long, CcmrMessage )] = 17 | { 18 | var outputList : ListBuffer[( Long, CcmrMessage )] = new ListBuffer 19 | 20 | val vSource = item._1 21 | val it = item._2.cc.iterator 22 | 23 | var isLocalMaxState = false 24 | if(it.hasNext) 25 | { 26 | val vFirst = it.next 27 | 28 | if(vSource < vFirst) 29 | { 30 | isLocalMaxState = true 31 | outputList.prepend((vSource, new CcmrMessage(TreeSet(vFirst), false))) 32 | } 33 | 34 | var vDest = vFirst 35 | while(it.hasNext) 36 | { 37 | vDest = it.next 38 | if(isLocalMaxState) 39 | { 40 | outputList.prepend((vSource, new CcmrMessage(TreeSet(vDest), false))) 41 | } else 42 | { 43 | outputList.prepend((vFirst, new CcmrMessage(TreeSet(vDest), false))) 44 | outputList.prepend((vDest, new CcmrMessage(TreeSet(vFirst), false))) 45 | outputList.prepend((vSource, new CcmrMessage(TreeSet(), true))) 46 | } 47 | } 48 | if(vSource < vDest && !isLocalMaxState) 49 | { 50 | outputList.prepend((vSource, new CcmrMessage(TreeSet(vFirst), true))) 51 | } 52 | 53 | } else 54 | { 55 | //outputList.prepend((vSource, new CcmrMessage(TreeSet(), false))) 56 | } 57 | 58 | 59 | 60 | outputList.toIterable 61 | } 62 | 63 | def reduceBlue( item1 : CcmrMessage, item2 : CcmrMessage ) : CcmrMessage = 64 | { 65 | new CcmrMessage( item1.cc ++ item2.cc, item1.iterationNeeded || item2.iterationNeeded ) 66 | } 67 | 68 | def main( args : Array[String] ) : Unit = 69 | { 70 | val timeBegin = System.currentTimeMillis() 71 | 72 | val property = new CCProperties("CCMR", args(0)).load.getImmutable 73 | 74 | val util = new CCUtil(property) 75 | val spark = util.getSparkContext() 76 | 77 | val timeSparkLoaded = System.currentTimeMillis() 78 | val file = spark.textFile( property.dataset , property.sparkPartition) 79 | 80 | util.io.printFileStart(property.appName) 81 | 82 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 83 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 84 | 85 | def toTreeSet(data : Set[Long]) : TreeSet[Long] = 86 | { 87 | var toReturn : TreeSet[Long] = TreeSet() 88 | val it = data.iterator 89 | while (it.hasNext) 90 | { 91 | toReturn = toReturn + it.next 92 | } 93 | 94 | toReturn 95 | } 96 | 97 | var ret = fusedData.map( item => ( item._1, new CcmrMessage( toTreeSet(item._2.toSet), false) ) ) 98 | 99 | val timeDataLoaded = System.currentTimeMillis() 100 | // // ccmr not correctly handle isolated vertices, these must be removed before starting the algorithm 101 | // ret = ret.filter(t => !t._2.cc.isEmpty) 102 | 103 | var control = false; 104 | var step = 0 105 | 106 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 107 | val reduceInputSizeAccumulator = spark.accumulator(0L) 108 | 109 | while ( !control ) { 110 | val timeStepStart = System.currentTimeMillis() 111 | 112 | ret = ret.flatMap( item => emitBlue( item ) ) 113 | 114 | if(property.printMessageStat) 115 | { 116 | val previousMessageSize = reduceInputSizeAccumulator.value 117 | val previousMessageNumber = reduceInputMessageNumberAccumulator.value 118 | 119 | ret.foreach(t => reduceInputSizeAccumulator += t._2.cc.size + 1) 120 | reduceInputMessageNumberAccumulator += ret.count 121 | 122 | util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize) 123 | } 124 | 125 | ret = ret.reduceByKey( reduceBlue ).cache 126 | 127 | val controlMap = ret.map(t => t._2.voteToHalt) 128 | // val test = controlMap.filter(t=>(!t)).count 129 | // util.io.printStat(test, "active") 130 | control = controlMap.reduce{case (a,b) => a && b} 131 | 132 | val timeStepBlue = System.currentTimeMillis() 133 | 134 | step = step + 1 135 | util.io.printTime( timeStepStart, timeStepBlue, "blue" ) 136 | util.printTimeStep(step, timeStepBlue-timeStepStart) 137 | } 138 | 139 | val timeEnd = System.currentTimeMillis() 140 | 141 | 142 | util.testEnded( ret.filter(t => !t._2.cc.isEmpty).map(t => (t._1, t._2.cc.size + 1)), 143 | step, 144 | timeBegin, 145 | timeEnd, 146 | timeSparkLoaded, 147 | timeDataLoaded, 148 | reduceInputMessageNumberAccumulator.value, 149 | reduceInputSizeAccumulator.value) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/cracker/CrackerMain.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | import org.apache.spark.SparkContext._ 4 | import org.apache.spark.SparkContext 5 | import scala.collection.mutable.ListBuffer 6 | import java.io.FileWriter 7 | import org.apache.spark.rdd.RDD 8 | import util.CCUtil 9 | import util.CCUtil 10 | import util.CCUtil 11 | import util.CCProperties 12 | 13 | object CrackerTreeMain { 14 | 15 | def main(args : Array[String]) : Unit = 16 | { 17 | val timeBegin = System.currentTimeMillis() 18 | 19 | /* 20 | * additional properties: 21 | * crackerUseUnionInsteadOfJoin : true | false 22 | * crackerCoalescePartition : true | false 23 | */ 24 | 25 | val propertyLoad = new CCProperties("CRACKER_TREE_SPLIT", args(0)).load 26 | val crackerUseUnionInsteadOfJoin = propertyLoad.getBoolean("crackerUseUnionInsteadOfJoin", true) 27 | val crackerCoalescePartition = propertyLoad.getBoolean("crackerCoalescePartition", true) 28 | val crackerForceEvaluation = propertyLoad.getBoolean("crackerForceEvaluation", true) 29 | val crackerSkipPropagation = propertyLoad.getBoolean("crackerSkipPropagation", false) 30 | 31 | val property = propertyLoad.getImmutable 32 | val cracker = new CrackerAlgorithm(property) 33 | 34 | val util = new CCUtil(property) 35 | val spark = util.getSparkContext() 36 | val stats = new CrackerStats(property, util, spark) 37 | 38 | val timeSparkLoaded = System.currentTimeMillis() 39 | val file = spark.textFile(property.dataset, property.sparkPartition) 40 | 41 | util.io.printFileStart(property.appName) 42 | 43 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 44 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 45 | 46 | var ret = fusedData.map(item => (item._1, new CrackerTreeMessageIdentification((item._2.toSet + item._1).min, item._2.toSet))) 47 | 48 | val timeDataLoaded = System.currentTimeMillis() 49 | 50 | var control = false; 51 | var step = 0 52 | 53 | var treeRDD : Option[RDD[(Long, CrackerTreeMessageTree)]] = Option.empty 54 | 55 | // if not done, CC of size 1 are not recognized 56 | treeRDD = Option.apply(ret.map(t => (t._1, new CrackerTreeMessageTree(-1, Set())))) 57 | 58 | while (!control) { 59 | // simplification step 60 | val timeStepStart = System.currentTimeMillis() 61 | 62 | stats.printSimplification(step, ret) 63 | 64 | ret = ret.flatMap(item => cracker.emitBlue(item, false)) 65 | 66 | stats.printMessageStats(step + 1, ret) 67 | 68 | ret = ret.reduceByKey(cracker.reduceBlue).cache 69 | 70 | val active = ret.count 71 | control = active == 0 72 | 73 | val timeStepBlue = System.currentTimeMillis() 74 | util.printTimeStep(step + 1, timeStepBlue-timeStepStart) 75 | 76 | if (!control) { 77 | stats.printSimplification(step+1, ret) 78 | // reduction step 79 | val tmp = ret.flatMap(item => cracker.emitRed(item)) 80 | 81 | stats.printMessageStats(step + 2, tmp) 82 | 83 | val tmpReduced = tmp.reduceByKey(cracker.reduceRed) 84 | 85 | ret = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get)) 86 | treeRDD = cracker.mergeTree(treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation) 87 | 88 | val timeStepEnd = System.currentTimeMillis() 89 | step = step + 2 90 | util.io.printTimeStep(timeStepStart, timeStepBlue, timeStepEnd) 91 | util.printTimeStep(step, timeStepEnd-timeStepBlue) 92 | } else { 93 | step = step + 1 94 | util.io.printTime(timeStepStart, timeStepBlue, "blue") 95 | } 96 | } 97 | 98 | stats.printSimplification(step, ret) 99 | 100 | if(!crackerSkipPropagation) 101 | { 102 | 103 | var treeRDDPropagationTmp = treeRDD.get 104 | 105 | if(crackerUseUnionInsteadOfJoin && crackerCoalescePartition) 106 | { 107 | val timeStepStart = System.currentTimeMillis() 108 | treeRDDPropagationTmp = treeRDDPropagationTmp.coalesce(property.sparkPartition) 109 | val timeStepBlue = System.currentTimeMillis() 110 | util.io.printTime(timeStepStart, timeStepBlue, "coalescing") 111 | } 112 | 113 | stats.printMessageStats(step, treeRDDPropagationTmp) 114 | 115 | var treeRDDPropagation = treeRDDPropagationTmp.reduceByKey(cracker.reducePrepareDataForPropagation).map(t => (t._1, t._2.getMessagePropagation(t._1))).cache 116 | 117 | control = false 118 | while (!control) { 119 | val timeStepStart = System.currentTimeMillis() 120 | treeRDDPropagation = treeRDDPropagation.flatMap(item => cracker.mapPropagate(item)) 121 | 122 | stats.printMessageStats(step + 1, treeRDDPropagation) 123 | 124 | treeRDDPropagation = treeRDDPropagation.reduceByKey(cracker.reducePropagate).cache 125 | control = treeRDDPropagation.map(t => t._2.min != -1).reduce { case (a, b) => a && b } 126 | 127 | step = step + 1 128 | val timeStepBlue = System.currentTimeMillis() 129 | util.io.printTime(timeStepStart, timeStepBlue, "propagation") 130 | util.printTimeStep(step, timeStepBlue-timeStepStart) 131 | } 132 | 133 | val timeEnd = System.currentTimeMillis() 134 | 135 | util.testEnded(treeRDDPropagation.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b }, 136 | step, 137 | timeBegin, 138 | timeEnd, 139 | timeSparkLoaded, 140 | timeDataLoaded, 141 | stats.reduceInputMessageNumberAccumulator.value, 142 | stats.reduceInputSizeAccumulator.value, 143 | getBitmaskStat(crackerUseUnionInsteadOfJoin,crackerCoalescePartition,crackerForceEvaluation)) 144 | 145 | } else 146 | { 147 | val timeEnd = System.currentTimeMillis() 148 | val vertexNumber = fusedData.count 149 | 150 | util.testEnded(treeRDD.get.map(t => (1L, 1)).reduceByKey { case (a, b) => a + b }, 151 | step, 152 | timeBegin, 153 | timeEnd, 154 | timeSparkLoaded, 155 | timeDataLoaded, 156 | stats.reduceInputMessageNumberAccumulator.value + cracker.getMessageNumberForPropagation(step, vertexNumber), 157 | stats.reduceInputSizeAccumulator.value + cracker.getMessageSizeForPropagation(step, vertexNumber), 158 | getBitmaskStat(crackerUseUnionInsteadOfJoin,crackerCoalescePartition,crackerForceEvaluation)) 159 | } 160 | } 161 | 162 | def bool2int(b:Boolean) = if (b) 1 else 0 163 | 164 | def getBitmaskStat( crackerUseUnionInsteadOfJoin : Boolean, 165 | crackerCoalescePartition : Boolean, 166 | crackerForceEvaluation : Boolean) : String = 167 | { 168 | bool2int(crackerUseUnionInsteadOfJoin).toString+bool2int(crackerCoalescePartition).toString+bool2int(crackerForceEvaluation).toString 169 | } 170 | 171 | } -------------------------------------------------------------------------------- /src/util/CCUtilIO.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import java.io.FileWriter 4 | import java.text.DecimalFormat 5 | 6 | import org.apache.spark.SparkContext._ 7 | import org.apache.spark.SparkContext 8 | import org.apache.spark.rdd.RDD 9 | 10 | import com.google.common.base.Joiner 11 | 12 | class CCUtilIO(property : CCPropertiesImmutable) extends Serializable 13 | { 14 | val fileStatDescription = "algorithmName,dataset,partition,step,timeAll,timeLoadingAndComputation,timeComputation,reduceInputMessageNumber,reduceInputSize,ccNumber,ccNumberNoIsolatedVertices,ccMaxSize,customColumn" 15 | val fileSimplificationDescritpion = "dataset,step,activeVertices,activeVerticesNormalized" 16 | 17 | def printStat( data : Long, description : String ) = 18 | { 19 | val printFile = new FileWriter( "time.txt", true ) 20 | printFile.write( description + ": " + data + "\n" ) 21 | printFile.close 22 | } 23 | 24 | def printSimplification( step : Int, activeVertices : Long, initialVertices : Long ) = 25 | { 26 | val joiner = Joiner.on(",") 27 | 28 | val printFile = new FileWriter( "simplification.txt", true ) 29 | 30 | val token : Array[Object] = Array(property.dataset, step.toString, activeVertices.toString, ((((activeVertices.toDouble * 100) / initialVertices)*100).round.toDouble / 100).toString) 31 | printFile.write(joiner.join(token)+ "\n" ) 32 | 33 | printFile.close 34 | } 35 | 36 | def printSimplification( step : Int, activeVertices : Long, initialVertices : Long , activeEdges : Double, degreeMax : Int) = 37 | { 38 | val printFile = new FileWriter( "simplification.txt", true ) 39 | 40 | val token : Array[Object] = Array( property.dataset, 41 | step.toString, 42 | activeVertices.toString, 43 | ((((activeVertices.toDouble * 100) / initialVertices)*100).round.toDouble / 100).toString, 44 | property.algorithmName, 45 | activeEdges.toString, 46 | (activeEdges / activeVertices).toString, 47 | degreeMax.toString) 48 | printFile.write(token.mkString(",")+ "\n" ) 49 | 50 | printFile.close 51 | } 52 | 53 | def printTimeStep( step : Int, time : Long) = 54 | { 55 | val joiner = Joiner.on(",") 56 | 57 | val printFile = new FileWriter( "timeStep.txt", true ) 58 | 59 | // dataset, algorithmName, step, time 60 | val token : Array[Object] = Array(property.dataset, property.algorithmName, step.toString, time.toString) 61 | printFile.write(joiner.join(token)+ "\n" ) 62 | 63 | printFile.close 64 | } 65 | 66 | def printMessageStep( step : Int, messageNumber : Long, messageSize : Long) = 67 | { 68 | val joiner = Joiner.on(",") 69 | 70 | val printFile = new FileWriter( "messageStep.txt", true ) 71 | 72 | val token : Array[Object] = Array(property.dataset, property.algorithmName, step.toString, messageNumber.toString, messageSize.toString) 73 | printFile.write(joiner.join(token)+ "\n" ) 74 | 75 | printFile.close 76 | } 77 | 78 | def printAllStat( algorithmName : String, 79 | dataset : String, 80 | partition : Int, 81 | step : Int, 82 | timaAll : Long, 83 | timeLoadingAndComputation : Long, 84 | timeComputation : Long, 85 | reduceInputMessageNumber : Long, 86 | reduceInputSize : Long, 87 | ccNumber : Long, 88 | ccNumberNoIsolatedVertices : Long, 89 | ccMaxSize : Int, 90 | customColumnValue : String) = 91 | { 92 | val printFile = new FileWriter( "stats.txt", true ) 93 | val joiner = Joiner.on(",") 94 | val token : Array[Object] = Array(algorithmName, dataset, partition.toString, step.toString, timaAll.toString, timeLoadingAndComputation.toString, timeComputation.toString, reduceInputMessageNumber.toString, reduceInputSize.toString, ccNumber.toString, ccNumberNoIsolatedVertices.toString, ccMaxSize.toString, customColumnValue) 95 | 96 | printFile.write(joiner.join(token)+ "\n" ) 97 | printFile.close 98 | } 99 | 100 | def printCCDistribution(rdd : RDD[(Long, Int)]) = 101 | { 102 | val printFile = new FileWriter( "distribution.txt", true ) 103 | val joiner = Joiner.on(",") 104 | 105 | val ccDistribution = rdd.map(t=>(t._2,1)).reduceByKey{case(a,b)=>a+b}.map(t=>t._1+","+t._2+"\n").reduce{case(a,b)=>a+b} 106 | 107 | // val token : Array[Object] = Array(algorithmName, dataset, partition.toString, hybridMessageSizeBound.toString, step.toString, timaAll.toString, timeLoadingAndComputation.toString, timeComputation.toString, reduceInputMessageNumber.toString, reduceInputSize.toString, ccNumber.toString, ccMaxSize.toString) 108 | // 109 | // printFile.write(joiner.join(token)+ "\n" ) 110 | printFile.write(ccDistribution+ "\n" ) 111 | 112 | printFile.close 113 | } 114 | 115 | def printEdgelist( data : RDD[(Long,Long)] ) = 116 | { 117 | val collected = data.collect.iterator 118 | val printFile = new FileWriter( "edgelist.txt", true ) 119 | while(collected.hasNext) 120 | { 121 | val next = collected.next 122 | printFile.write( next._1+" "+next._2 + "\n" ) 123 | } 124 | printFile.close 125 | } 126 | 127 | def printFileStart(description : String) = 128 | { 129 | val printFile = new FileWriter( "time.txt", true ) 130 | printFile.write("\n"+ description+": START\n" ) 131 | printFile.close 132 | } 133 | 134 | def printFileEnd(description : String) = 135 | { 136 | val printFile = new FileWriter( "time.txt", true ) 137 | printFile.write( description+": END\n" ) 138 | printFile.close 139 | } 140 | 141 | def printTime( start : Long, end : Long, description : String ) = 142 | { 143 | val printFile = new FileWriter( "time.txt", true ) 144 | printFile.write( description + ": " + ( end - start ) + "\n" ) 145 | printFile.close 146 | } 147 | 148 | def printStep( step : Int ) = 149 | { 150 | val printFile = new FileWriter( "time.txt", true ) 151 | printFile.write( "step: "+ step + "\n" ) 152 | printFile.close 153 | } 154 | 155 | def printTimeStep( start : Long, red : Long, end : Long ) = 156 | { 157 | val printFile = new FileWriter( "time.txt", true ) 158 | printFile.write( "blue: " + ( red - start ) + " red: " + ( end - red ) + " all: " + ( end - start ) + "\n" ) 159 | printFile.close 160 | } 161 | 162 | def printToFile( file : String, data : String ) = 163 | { 164 | val printFile = new FileWriter( file, true ) 165 | printFile.write( data ) 166 | printFile.close 167 | } 168 | 169 | } -------------------------------------------------------------------------------- /src/util/CCUtil.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import org.apache.spark.SparkContext._ 4 | import org.apache.spark.rdd.RDD 5 | import scala.collection.mutable.ListBuffer 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.SparkConf 8 | 9 | class CCUtil(property: CCPropertiesImmutable) extends Serializable { 10 | val io = new CCUtilIO(property) 11 | var vertexNumber = 0L 12 | 13 | def getSparkContext(): SparkContext = { 14 | val conf = new SparkConf() 15 | .setMaster(property.sparkMaster) 16 | .setAppName(property.appName) 17 | .set("spark.executor.memory", property.sparkExecutorMemory) 18 | .set("spark.storage.blockManagerSlaveTimeoutMs", property.sparkBlockManagerSlaveTimeoutMs) 19 | .setJars(Array(property.jarPath)) 20 | 21 | if (property.sparkCoresMax > 0) { 22 | conf.set("spark.cores.max", property.sparkCoresMax.toString) 23 | val executorCore = property.sparkCoresMax / property.sparkExecutorInstances 24 | conf.set("spark.executor.cores", executorCore.toString) 25 | } 26 | if (property.sparkExecutorInstances > 0) { 27 | conf.set("spark.executor.instances", property.sparkExecutorInstances.toString) 28 | } 29 | 30 | val spark = new SparkContext(conf) 31 | 32 | spark.setCheckpointDir(".") 33 | 34 | spark 35 | } 36 | 37 | // return edgelist and edge associated to each vertex 38 | def loadEdgeFromFile(data: RDD[String]): (RDD[(Long, Long)], RDD[(Long, Iterable[Long])]) = { 39 | val toReturnEdgeList = data.flatMap(line => { 40 | val splitted = line.split(property.separator) 41 | if (splitted.size >= 1) { 42 | try { 43 | Array((splitted(0).toLong, splitted(1).toLong), (splitted(1).toLong, splitted(0).toLong)) 44 | } catch { 45 | case e: Exception => Array[(Long, Long)]() 46 | } 47 | } else { 48 | Array[(Long, Long)]() 49 | } 50 | }) 51 | 52 | val toReturnVertex = toReturnEdgeList.distinct.groupByKey 53 | 54 | if (property.printMessageStat) { 55 | val edgeNumber = toReturnEdgeList.count / 2 56 | vertexNumber = toReturnVertex.count 57 | 58 | io.printStat(edgeNumber, "edgeNumber") 59 | io.printStat(vertexNumber, "vertexNumber") 60 | } 61 | 62 | (toReturnEdgeList, toReturnVertex) 63 | } 64 | 65 | // load from a file in the format of 66 | // vertexID, arcID 67 | def loadVertexEdgeFile(data: RDD[String]): (RDD[(Long, Long)], RDD[(Long, Iterable[Long])]) = { 68 | def mapToEdgeList(item: (String, Iterable[Long])): Iterable[(Long, Long)] = { 69 | var outputList: ListBuffer[(Long, Long)] = new ListBuffer 70 | 71 | val it = item._2.iterator 72 | 73 | while (it.hasNext) { 74 | val next = it.next 75 | val it2 = item._2.iterator 76 | 77 | while (it2.hasNext) { 78 | val next2 = it2.next 79 | 80 | if (next != next2) { 81 | outputList.prepend((next, next2)) 82 | } 83 | } 84 | } 85 | 86 | outputList.toIterable 87 | } 88 | 89 | val toReturnEdgeList = data.flatMap(line => { 90 | val splitted = line.split(",") 91 | if (splitted.size >= 1) { 92 | try { 93 | Array((splitted(1), splitted(0).toLong)) 94 | } catch { 95 | case e: Exception => Array[(String, Long)]() 96 | } 97 | } else { 98 | Array[(String, Long)]() 99 | } 100 | }) 101 | 102 | val edgeList = toReturnEdgeList.groupByKey.flatMap(mapToEdgeList) 103 | 104 | // io.printEdgelist(edgeList) 105 | 106 | val toReturnVertex = edgeList.groupByKey 107 | 108 | if (property.printMessageStat) { 109 | val edgeNumber = toReturnEdgeList.count 110 | val vertexNumber = toReturnVertex.count 111 | 112 | io.printStat(edgeNumber, "edgeNumber") 113 | io.printStat(vertexNumber, "vertexNumber") 114 | } 115 | 116 | (edgeList, toReturnVertex) 117 | } 118 | 119 | def getCCNumber(rdd: RDD[(Long, Int)]) = { 120 | rdd.count 121 | } 122 | 123 | def getCCNumberNoIsolatedVertices(rdd: RDD[(Long, Int)]) = { 124 | rdd.filter(t => t._2 != 1).count 125 | } 126 | 127 | def getCCMaxSize(rdd: RDD[(Long, Int)]) = { 128 | rdd.map(t => t._2).max 129 | } 130 | 131 | def printSimplification(step: Int, activeVertices: Long) = { 132 | io.printSimplification(step, activeVertices, vertexNumber) 133 | } 134 | 135 | def printSimplification(step: Int, activeVertices: Long, activeEdges: Double, degreeMax: Int) = { 136 | io.printSimplification(step, activeVertices, vertexNumber, activeEdges, degreeMax) 137 | } 138 | 139 | def printTimeStep(step: Int, time: Long) = { 140 | if (!property.printMessageStat) 141 | io.printTimeStep(step, time) 142 | } 143 | 144 | def printMessageStep(step: Int, messageNumber: Long, messageSize: Long) = { 145 | io.printMessageStep(step, messageNumber, messageSize) 146 | } 147 | 148 | def testEnded(rdd: RDD[(Long, Int)], step: Int, timeBegin: Long, timeEnd: Long, timeSparkLoaded: Long, timeDataLoaded: Long, reduceInputMessageNumber: Long, reduceInputSize: Long, bitmask: String = "", optimization: String = "") = { 149 | io.printTime(timeBegin, timeEnd, "all") 150 | io.printTime(timeSparkLoaded, timeEnd, "allComputationAndLoadingGraph") 151 | io.printTime(timeDataLoaded, timeEnd, "allComputation") 152 | io.printStep(step) 153 | io.printStat(reduceInputMessageNumber, "reduceInputMessageNumber") 154 | io.printStat(reduceInputSize, "reduceInputSize") 155 | io.printFileEnd(property.appName) 156 | 157 | io.printAllStat(property.algorithmName, 158 | property.dataset, 159 | property.sparkPartition, 160 | step, 161 | (timeEnd - timeBegin), 162 | (timeEnd - timeSparkLoaded), 163 | (timeEnd - timeDataLoaded), 164 | reduceInputMessageNumber, 165 | reduceInputSize, 166 | getCCNumber(rdd), 167 | getCCNumberNoIsolatedVertices(rdd), 168 | getCCMaxSize(rdd), 169 | property.customColumnValue) 170 | 171 | if (property.printCCDistribution) 172 | io.printCCDistribution(rdd) 173 | } 174 | 175 | /*def testEnded(ccNumber : Long, ccNumberNoIsolatedVertices : Long, step : Int, timeBegin : Long, timeEnd : Long, timeSparkLoaded : Long, timeDataLoaded : Long, reduceInputMessageNumber : Long, reduceInputSize : Long) = 176 | { 177 | io.printTime( timeBegin, timeEnd, "all" ) 178 | io.printTime( timeSparkLoaded, timeEnd, "allComputationAndLoadingGraph" ) 179 | io.printTime( timeDataLoaded, timeEnd, "allComputation" ) 180 | io.printStep( step ) 181 | io.printStat(reduceInputMessageNumber, "reduceInputMessageNumber") 182 | io.printStat(reduceInputSize, "reduceInputSize") 183 | io.printFileEnd(property.appName) 184 | 185 | io.printAllStat( property.algorithmName, 186 | property.dataset, 187 | property.sparkPartition, 188 | step, 189 | (timeEnd - timeBegin), 190 | (timeEnd - timeSparkLoaded) , 191 | (timeEnd - timeDataLoaded), 192 | reduceInputMessageNumber, 193 | reduceInputSize, 194 | ccNumber, 195 | ccNumberNoIsolatedVertices, 196 | 0, 197 | property.customColumnValue) 198 | }*/ 199 | } 200 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | cracker 5 | cracker 6 | 0.0.1-SNAPSHOT 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | org.apache.spark 15 | spark-core_2.10 16 | 2.1.1 17 | 18 | 19 | org.scala-lang 20 | scala-library 21 | 2.10.6 22 | 23 | 24 | 25 | src 26 | 27 | 28 | 29 | net.alchim31.maven 30 | scala-maven-plugin 31 | 3.2.1 32 | 33 | 34 | org.apache.maven.plugins 35 | maven-compiler-plugin 36 | 2.0.2 37 | 38 | 1.7 39 | 1.7 40 | 41 | 42 | 43 | org.apache.maven.plugins 44 | maven-jar-plugin 45 | 2.2 46 | 47 | 48 | 49 | org.apache.maven.plugins 50 | maven-shade-plugin 51 | 1.5 52 | 53 | 54 | package 55 | 56 | shade 57 | 58 | 59 | true 60 | allinone 61 | 62 | 63 | *:* 64 | 65 | 66 | 67 | 69 | reference.conf 70 | 71 | 73 | 74 | akka.Main 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-assembly-plugin 109 | 2.2-beta-4 110 | 111 | 112 | jar-with-dependencies 113 | 114 | 115 | 116 | util.Main 117 | 118 | 119 | 120 | 121 | 122 | package 123 | 124 | single 125 | 126 | 127 | 128 | 129 | 130 | org.scala-tools 131 | maven-scala-plugin 132 | 133 | 134 | 135 | compile 136 | 137 | compile 138 | 139 | compile 140 | 141 | 142 | 143 | test-compile 144 | 145 | testCompile 146 | 147 | test-compile 148 | 149 | 150 | 151 | process-resources 152 | 153 | compile 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | net.alchim31.maven 171 | scala-maven-plugin 172 | 173 | 174 | scala-compile-first 175 | process-resources 176 | 177 | add-source 178 | compile 179 | 180 | 181 | 182 | scala-test-compile 183 | process-test-resources 184 | 185 | testCompile 186 | 187 | 188 | 189 | 190 | 191 | org.apache.maven.plugins 192 | maven-compiler-plugin 193 | 194 | 195 | compile 196 | 197 | compile 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | org.apache.maven.plugins 207 | maven-jar-plugin 208 | 209 | 210 | **/log4j.properties 211 | 212 | 213 | 214 | true 215 | util.Main 216 | dependency-jars/ 217 | 218 | 219 | 220 | 221 | 222 | 223 | maven-assembly-plugin 224 | 225 | 226 | package 227 | 228 | single 229 | 230 | 231 | 232 | 233 | 234 | jar-with-dependencies 235 | 236 | 237 | 238 | 239 | 240 | 241 | -------------------------------------------------------------------------------- /src/cracker/CrackerAlgorithm.scala: -------------------------------------------------------------------------------- 1 | package cracker 2 | 3 | import org.apache.spark.SparkContext._ 4 | import scala.collection.mutable.ListBuffer 5 | import org.apache.spark.rdd.RDD 6 | import org.apache.spark.SparkContext 7 | import java.io.FileWriter 8 | import util.CCPropertiesImmutable 9 | 10 | @serializable 11 | class CrackerAlgorithm(property : CCPropertiesImmutable) { 12 | def mapPropagate(item : (Long, CrackerTreeMessagePropagation)) : Iterable[(Long, CrackerTreeMessagePropagation)] = 13 | { 14 | var outputList : ListBuffer[(Long, CrackerTreeMessagePropagation)] = new ListBuffer 15 | if (item._2.min != -1) { 16 | outputList.prepend((item._1, new CrackerTreeMessagePropagation(item._2.min, Set()))) 17 | val it = item._2.child.iterator 18 | while (it.hasNext) { 19 | val next = it.next 20 | outputList.prepend((next, new CrackerTreeMessagePropagation(item._2.min, Set()))) 21 | } 22 | } else { 23 | outputList.prepend(item) 24 | } 25 | outputList 26 | } 27 | 28 | def reducePropagate(item1 : CrackerTreeMessagePropagation, item2 : CrackerTreeMessagePropagation) : CrackerTreeMessagePropagation = 29 | { 30 | var minEnd = item1.min 31 | if (minEnd == -1) minEnd = item2.min 32 | 33 | new CrackerTreeMessagePropagation(minEnd, item1.child ++ item2.child) 34 | } 35 | 36 | def emitBlue(item : (Long, CrackerTreeMessageIdentification), forceLoadBalancing : Boolean, edgePruning : Boolean = true) : Iterable[(Long, CrackerTreeMessageIdentification)] = 37 | { 38 | var outputList : ListBuffer[(Long, CrackerTreeMessageIdentification)] = new ListBuffer 39 | if (item._2.min == item._1 && (item._2.neigh.isEmpty || (item._2.neigh.size == 1 && item._2.neigh.contains(item._1)))) { 40 | // outputList.prepend( ( item._1, new CrackerTreeMessage( item._2.min, Set()) ) ) 41 | } else { 42 | 43 | val min = item._2.min 44 | 45 | if (item._2.neigh.isEmpty) { 46 | outputList.prepend((item._1, new CrackerTreeMessageIdentification(min, Set()))) 47 | } else { 48 | outputList.prepend((item._1, new CrackerTreeMessageIdentification(min, Set(min)))) 49 | } 50 | 51 | if (min < item._1 || !forceLoadBalancing || !edgePruning) { 52 | val it = item._2.neigh.iterator 53 | while (it.hasNext) { 54 | val next = it.next 55 | outputList.prepend((next, new CrackerTreeMessageIdentification(min, Set(min)))) 56 | } 57 | } 58 | } 59 | 60 | // val printFile = new FileWriter( "check.txt", true ) 61 | // 62 | // printFile.write("BLUE "+item._1+ "\n" ) 63 | // 64 | // printFile.close 65 | 66 | outputList.toIterable 67 | } 68 | 69 | def emitRed(item : (Long, CrackerTreeMessageIdentification)) : Iterable[(Long, CrackerTreeMessageRedPhase)] = { 70 | 71 | emitRed(item, false) 72 | } 73 | 74 | def emitRed(item : (Long, CrackerTreeMessageIdentification), forceLoadBalancing : Boolean, obliviousSeed : Boolean = true) : Iterable[(Long, CrackerTreeMessageRedPhase)] = { 75 | 76 | var outputList : ListBuffer[(Long, CrackerTreeMessageRedPhase)] = new ListBuffer 77 | 78 | val minset : Set[Long] = item._2.neigh 79 | if (minset.size > 1) { 80 | if(property.loadBalancing || forceLoadBalancing || obliviousSeed) 81 | { 82 | outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, Set(item._2.min))))) 83 | } 84 | else 85 | { 86 | outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, minset)))) 87 | } 88 | var it = minset.iterator 89 | while (it.hasNext) { 90 | val value : Long = it.next 91 | if (value != item._2.min) 92 | outputList.prepend((value, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, Set(item._2.min))))) 93 | } 94 | } else if (minset.size == 1 && minset.contains(item._1)) { 95 | outputList.prepend((item._1, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._1, Set())))) 96 | } 97 | 98 | if (!item._2.neigh.contains(item._1)) { 99 | outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageTree(-1, Set(item._1))))) 100 | outputList.prepend((item._1, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageTree(item._2.min, Set())))) 101 | } 102 | 103 | // val printFile = new FileWriter( "check.txt", true ) 104 | // 105 | // printFile.write("RED "+item._1+ "\n" ) 106 | // 107 | // printFile.close 108 | 109 | outputList.toIterable 110 | } 111 | 112 | def reduceBlue(item1 : CrackerTreeMessageIdentification, item2 : CrackerTreeMessageIdentification) : CrackerTreeMessageIdentification = 113 | { 114 | val ret = item1.neigh ++ item2.neigh 115 | val min = Math.min(item1.min, item2.min) 116 | 117 | new CrackerTreeMessageIdentification(min, ret) 118 | } 119 | 120 | def mergeMessageIdentification(first : Option[CrackerTreeMessageIdentification], second : Option[CrackerTreeMessageIdentification]) : Option[CrackerTreeMessageIdentification] = 121 | { 122 | if (first.isDefined) { 123 | first.get.merge(second) 124 | } else { 125 | second 126 | } 127 | } 128 | 129 | def mergeMessageTree(first : Option[CrackerTreeMessageTree], second : Option[CrackerTreeMessageTree]) : Option[CrackerTreeMessageTree] = 130 | { 131 | if (first.isDefined) { 132 | first.get.merge(second) 133 | } else { 134 | second 135 | } 136 | } 137 | 138 | def reduceRed(item1 : CrackerTreeMessageRedPhase, item2 : CrackerTreeMessageRedPhase) : CrackerTreeMessageRedPhase = 139 | { 140 | new CrackerTreeMessageRedPhase(mergeMessageIdentification(item1.first, item2.first), mergeMessageTree(item1.second, item2.second)) 141 | } 142 | 143 | def mergeTree(start : Option[RDD[(Long, CrackerTreeMessageTree)]], add : RDD[(Long, CrackerTreeMessageTree)], crackerUseUnionInsteadOfJoin : Boolean, crackerForceEvaluation : Boolean) : Option[RDD[(Long, CrackerTreeMessageTree)]] = 144 | { 145 | if (start.isDefined) { 146 | if(crackerUseUnionInsteadOfJoin) 147 | { 148 | Option.apply(start.get.union(add)) 149 | } else 150 | { 151 | if(crackerForceEvaluation) 152 | { 153 | val treeUpdated = start.get.leftOuterJoin(add).map(t => (t._1, t._2._1.merge(t._2._2).get)) 154 | val forceEvaluation = treeUpdated.count 155 | Option.apply(treeUpdated) 156 | } else 157 | { 158 | Option.apply(start.get.leftOuterJoin(add).map(t => (t._1, t._2._1.merge(t._2._2).get))) 159 | } 160 | } 161 | } else { 162 | Option.apply(add) 163 | } 164 | } 165 | 166 | def mergeTree(spark : SparkContext, start : Option[RDD[(Long, CrackerTreeMessageTree)]], add : Array[(Long, CrackerTreeMessageTree)], crackerUseUnionInsteadOfJoin : Boolean, crackerForceEvaluation : Boolean) : Option[RDD[(Long, CrackerTreeMessageTree)]] = 167 | { 168 | if (start.isDefined) { 169 | if(crackerUseUnionInsteadOfJoin) 170 | { 171 | Option.apply(start.get.union(spark.parallelize(add))) 172 | } else 173 | { 174 | if(crackerForceEvaluation) 175 | { 176 | val treeUpdated = start.get.leftOuterJoin(spark.parallelize(add)).map(t => (t._1, t._2._1.merge(t._2._2).get)) 177 | val forceEvaluation = treeUpdated.count 178 | Option.apply(treeUpdated) 179 | } else 180 | { 181 | Option.apply(start.get.leftOuterJoin(spark.parallelize(add)).map(t => (t._1, t._2._1.merge(t._2._2).get))) 182 | } 183 | } 184 | } else { 185 | Option.apply(spark.parallelize(add)) 186 | } 187 | } 188 | 189 | def mergeTree(start : Option[Array[(Long, CrackerTreeMessageTree)]], add : Array[(Long, CrackerTreeMessageTree)]) : Option[Array[(Long, CrackerTreeMessageTree)]] = 190 | { 191 | if (start.isDefined) { 192 | Option.apply(start.get.union(add)) 193 | } else { 194 | Option.apply(add) 195 | } 196 | } 197 | 198 | def reducePrepareDataForPropagation(a : CrackerTreeMessageTree, b : CrackerTreeMessageTree) : CrackerTreeMessageTree = 199 | { 200 | var parent = a.parent 201 | if (parent == -1) parent = b.parent 202 | 203 | new CrackerTreeMessageTree(parent, a.child ++ b.child) 204 | } 205 | 206 | def getMessageNumberForPropagation(step : Int, vertexNumber : Long) = 207 | { 208 | val stepPropagation = (step - 1) / 2 209 | 210 | (vertexNumber * stepPropagation) + vertexNumber 211 | } 212 | 213 | def getMessageSizeForPropagation(step : Int, vertexNumber : Long) = 214 | { 215 | val stepPropagation = (step - 1) / 2 216 | 217 | ((vertexNumber * 2) * stepPropagation) - vertexNumber 218 | } 219 | } -------------------------------------------------------------------------------- /src/crackerAllOptimizations/CrackerAllOptimizationsMain.scala: -------------------------------------------------------------------------------- 1 | package crackerAllOptimizations 2 | 3 | import scala.Array.canBuildFrom 4 | import org.apache.spark.SparkContext 5 | import org.apache.spark.rdd.RDD 6 | import cracker._ 7 | import util.CCProperties 8 | import util.CCUtil 9 | import util.CCPropertiesImmutable 10 | import java.io.PrintWriter 11 | import java.io.File 12 | import java.io.FileWriter 13 | 14 | object CrackerAllOptimizationsMain { 15 | 16 | def printGraph(util: CCUtil, step: Int, description: String, g: RDD[(Long, CrackerTreeMessageIdentification)]) = { 17 | util.io.printToFile("graph.txt", "STEP " + step + "\t[" + description + "]\t" + g.map(t => "{" + t._1 + " " + t._2.toString + "} ").reduce { case (a, b) => a + b } + "\n") 18 | } 19 | 20 | def main(args: Array[String]): Unit = { 21 | val timeBegin = System.currentTimeMillis() 22 | /* 23 | * additional properties: 24 | * crackerUseUnionInsteadOfJoin : true | false 25 | * crackerCoalescePartition : true | false 26 | */ 27 | 28 | val propertyLoad = new CCProperties("CRACKER_ALL", args(0)).load 29 | val crackerUseUnionInsteadOfJoin = propertyLoad.getBoolean("crackerUseUnionInsteadOfJoin", true) 30 | val crackerCoalescePartition = propertyLoad.getBoolean("crackerCoalescePartition", true) 31 | val crackerForceEvaluation = propertyLoad.getBoolean("crackerForceEvaluation", true) 32 | val crackerSkipPropagation = propertyLoad.getBoolean("crackerSkipPropagation", false) 33 | 34 | val (edgePruning, obliviousSeed, fcs) = getOptimizations(propertyLoad.get("optimizations", "111")) 35 | 36 | val property = propertyLoad.getImmutable 37 | val cracker = new CrackerAlgorithm(property) 38 | 39 | val util = new CCUtil(property) 40 | val spark = util.getSparkContext() 41 | val stats = new CrackerStats(property, util, spark) 42 | 43 | val timeSparkLoaded = System.currentTimeMillis() 44 | val file = spark.textFile(property.dataset, property.sparkPartition) 45 | 46 | util.io.printFileStart(property.appName) 47 | 48 | // val (parsedData, fusedData) = util.loadVertexEdgeFile(file) 49 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 50 | 51 | var ret = fusedData.map(item => (item._1, new CrackerTreeMessageIdentification((item._2.toSet + item._1).min, item._2.toSet))) 52 | 53 | val timeDataLoaded = System.currentTimeMillis() 54 | 55 | var control = false; 56 | var step = 0 57 | 58 | var treeRDD: Option[RDD[(Long, CrackerTreeMessageTree)]] = Option.empty 59 | 60 | // if not done, CC of size 1 are not recognized 61 | treeRDD = Option.apply(ret.map(t => (t._1, new CrackerTreeMessageTree(-1, Set())))) 62 | 63 | // what did i do 3 years ago!?!?!? 64 | def forceLoadBalancing(step: Int): Boolean = { 65 | step == 0 || step == 2 || step == 8 || step == 16 || step == 32 66 | // step < 10 && step % 3 == 0 67 | } 68 | 69 | while (!control) { 70 | // simplification step 71 | val timeStepStart = System.currentTimeMillis() 72 | 73 | stats.printSimplification(step, ret) 74 | 75 | ret = ret.flatMap(item => cracker.emitBlue(item, true, edgePruning)) 76 | 77 | stats.printMessageStats(step + 1, ret) 78 | 79 | ret = ret.reduceByKey(cracker.reduceBlue).cache 80 | 81 | val active = ret.count 82 | // control = active == 0 83 | control = active <= property.switchLocal // set the number where to switch in local mode 84 | 85 | val timeStepBlue = System.currentTimeMillis() 86 | util.printTimeStep(step + 1, timeStepBlue - timeStepStart) 87 | 88 | if (!control) { 89 | stats.printSimplification(step + 1, ret) 90 | // reduction step 91 | val check = step 92 | val tmp = ret.flatMap(item => cracker.emitRed(item, forceLoadBalancing(check), obliviousSeed)) 93 | if (forceLoadBalancing(check)) { 94 | util.io.printStat(check, "loadBalancing triggered") 95 | } 96 | 97 | stats.printMessageStats(step + 2, tmp) 98 | 99 | val tmpReduced = tmp.reduceByKey(cracker.reduceRed) 100 | 101 | ret = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get)) 102 | treeRDD = cracker.mergeTree(treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation) 103 | 104 | val timeStepEnd = System.currentTimeMillis() 105 | step = step + 2 106 | util.io.printTimeStep(timeStepStart, timeStepBlue, timeStepEnd) 107 | util.printTimeStep(step, timeStepEnd - timeStepBlue) 108 | } else { 109 | step = step + 1 110 | util.io.printTime(timeStepStart, timeStepBlue, "blue") 111 | } 112 | } 113 | 114 | stats.printSimplification(step, ret) 115 | 116 | if (fcs) // run local 117 | { 118 | val timeLocalStart = System.currentTimeMillis() 119 | var retCollected = ret.collect 120 | 121 | control = false 122 | var localStep = 0 123 | 124 | while (!control) { 125 | // simpli 126 | val tmp = retCollected.flatMap(item => cracker.emitRed(item)) 127 | 128 | val tmpReduced = tmp.groupBy(t => t._1).toArray.map { case (group, traversable) => (group, traversable.map(t => t._2).reduce(cracker.reduceRed)) } 129 | 130 | retCollected = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get)) 131 | treeRDD = cracker.mergeTree(spark, treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation) 132 | 133 | // blue step 134 | retCollected = retCollected.flatMap(item => cracker.emitBlue(item, false)) 135 | 136 | retCollected = retCollected.groupBy(t => t._1).toArray.map { case (group, traversable) => (group, traversable.map(t => t._2).reduce(cracker.reduceBlue)) } 137 | 138 | val active = retCollected.size 139 | // util.io.printStat(active, "active vertices") 140 | control = active == 0 141 | localStep += 2 142 | } 143 | 144 | val timeLocalEnd = System.currentTimeMillis() 145 | util.io.printStat(localStep, "localStep") 146 | util.io.printStat(timeLocalEnd - timeLocalStart, "localTime") 147 | } 148 | 149 | if (!crackerSkipPropagation) { 150 | 151 | var treeRDDPropagationTmp = treeRDD.get 152 | 153 | if (crackerUseUnionInsteadOfJoin && crackerCoalescePartition) { 154 | val timeStepStart = System.currentTimeMillis() 155 | treeRDDPropagationTmp = treeRDDPropagationTmp.coalesce(property.sparkPartition) 156 | val timeStepBlue = System.currentTimeMillis() 157 | util.io.printTime(timeStepStart, timeStepBlue, "coalescing") 158 | } 159 | 160 | stats.printMessageStats(step, treeRDDPropagationTmp) 161 | 162 | var treeRDDPropagation = treeRDDPropagationTmp.reduceByKey(cracker.reducePrepareDataForPropagation).map(t => (t._1, t._2.getMessagePropagation(t._1))).cache 163 | 164 | control = false 165 | while (!control) { 166 | val timeStepStart = System.currentTimeMillis() 167 | treeRDDPropagation = treeRDDPropagation.flatMap(item => cracker.mapPropagate(item)) 168 | 169 | stats.printMessageStats(step + 1, treeRDDPropagation) 170 | 171 | treeRDDPropagation = treeRDDPropagation.reduceByKey(cracker.reducePropagate).cache 172 | control = treeRDDPropagation.map(t => t._2.min != -1).reduce { case (a, b) => a && b } 173 | 174 | step = step + 1 175 | val timeStepBlue = System.currentTimeMillis() 176 | util.io.printTime(timeStepStart, timeStepBlue, "propagation") 177 | util.printTimeStep(step, timeStepBlue - timeStepStart) 178 | } 179 | 180 | val timeEnd = System.currentTimeMillis() 181 | 182 | if (property.printLargestCC) { 183 | printLargestCC(spark, property, treeRDDPropagation, parsedData) 184 | } 185 | 186 | if(property.printAll) { 187 | treeRDDPropagation.map(t => t._1+" "+t._2.min).saveAsTextFile(property.outputFile) 188 | } 189 | 190 | util.testEnded(treeRDDPropagation.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b }, 191 | step, 192 | timeBegin, 193 | timeEnd, 194 | timeSparkLoaded, 195 | timeDataLoaded, 196 | stats.reduceInputMessageNumberAccumulator.value, 197 | stats.reduceInputSizeAccumulator.value, 198 | getBitmaskStat(crackerUseUnionInsteadOfJoin, crackerCoalescePartition, crackerForceEvaluation), 199 | propertyLoad.get("optimizations", "111")) 200 | } else { 201 | val timeEnd = System.currentTimeMillis() 202 | val vertexNumber = fusedData.count 203 | 204 | util.testEnded(treeRDD.get.map(t => (1L, 1)).reduceByKey { case (a, b) => a + b }, 205 | step, 206 | timeBegin, 207 | timeEnd, 208 | timeSparkLoaded, 209 | timeDataLoaded, 210 | stats.reduceInputMessageNumberAccumulator.value + cracker.getMessageNumberForPropagation(step, vertexNumber), 211 | stats.reduceInputSizeAccumulator.value + cracker.getMessageSizeForPropagation(step, vertexNumber), 212 | getBitmaskStat(crackerUseUnionInsteadOfJoin, crackerCoalescePartition, crackerForceEvaluation), 213 | propertyLoad.get("optimizations", "111")) 214 | } 215 | } 216 | 217 | def bool2int(b: Boolean) = if (b) 1 else 0 218 | 219 | def printLargestCC(sc: SparkContext, property: CCPropertiesImmutable, tree: RDD[(Long, CrackerTreeMessagePropagation)], edgelist: RDD[(Long, Long)]) = { 220 | val maxCCId = tree.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b }.max()(new Ordering[Tuple2[Long, Int]]() { 221 | override def compare(x: (Long, Int), y: (Long, Int)): Int = 222 | Ordering[Int].compare(x._2, y._2) 223 | })._1 224 | 225 | val maxCCVertex = tree.filter(t => t._2.min == maxCCId).map(t => t._1) 226 | 227 | val maxCCVertexBroadcast = sc.broadcast(maxCCVertex.collect.toSet) 228 | val edgelistFiltered = edgelist.filter { case (s, d) => maxCCVertexBroadcast.value.contains(d) }.collect 229 | 230 | val writer = new FileWriter(property.filenameLargestCC, false) 231 | 232 | var edge = "" 233 | for (edge <- edgelistFiltered) { 234 | writer.write(edge._1 + " " + edge._2 + "\n") 235 | } 236 | 237 | writer.close() 238 | 239 | // edgelistFiltered.saveAsTextFile(property.filenameLargestCC) 240 | } 241 | 242 | def getBitmaskStat(crackerUseUnionInsteadOfJoin: Boolean, 243 | crackerCoalescePartition: Boolean, 244 | crackerForceEvaluation: Boolean): String = { 245 | bool2int(crackerUseUnionInsteadOfJoin).toString + bool2int(crackerCoalescePartition).toString + bool2int(crackerForceEvaluation).toString 246 | } 247 | 248 | def getOptimizations(data: String): (Boolean, Boolean, Boolean) = { 249 | data match { 250 | case "100" => (true, false, false) 251 | case "010" => (false, true, false) 252 | case "010" => (false, false, true) 253 | case _ => (true, true, true) 254 | } 255 | } 256 | 257 | } -------------------------------------------------------------------------------- /src/sgc/SGCMain.scala: -------------------------------------------------------------------------------- 1 | package sgc 2 | 3 | import java.io.FileWriter 4 | import scala.collection.mutable.ListBuffer 5 | import org.apache.spark.SparkContext._ 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.rdd.RDD 8 | import util.CCUtil 9 | import util.CCProperties 10 | import util.CCProperties 11 | import org.apache.spark.Accumulator 12 | 13 | object SGCMain 14 | { 15 | def forestInitializationStart( node : (Long, Set[Long])) : Iterable[( Long, (Set[Long], Set[Long] ))] = // id, neighbour, per cui sono min 16 | { 17 | var outputList : ListBuffer[( Long, (Set[Long], Set[Long] ))] = new ListBuffer 18 | 19 | val min = Math.min(node._2.min, node._1) 20 | 21 | if(min != node._1) 22 | { 23 | outputList.prepend( ( min, (Set(), Set(node._1)) )) // dici al min che non è singleton 24 | } 25 | 26 | outputList.prepend( ( node._1, (node._2, Set()) )) 27 | 28 | outputList 29 | } 30 | 31 | def forestInitializationReduceStart(a : (Set[Long], Set[Long] ), b : (Set[Long], Set[Long] )) = 32 | { 33 | (a._1 ++ b._1, a._2 ++ b._2) 34 | } 35 | 36 | def forestInitializationEnd( node : (Long, (Set[Long], Set[Long]))) = // id, neighbour, p(v), child(v) 37 | { 38 | var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long] ))] = new ListBuffer 39 | 40 | if(node._2._2.isEmpty) // se singleton 41 | { 42 | val min = node._2._1.min 43 | 44 | outputList.prepend( ( node._1, (node._2._1, min, Set()) )) 45 | outputList.prepend( ( min, (Set(), -1, Set(node._1)) )) 46 | } else 47 | { 48 | val min = Math.min(node._2._1.min, node._1) 49 | 50 | outputList.prepend( ( node._1, (node._2._1, min, node._2._2) )) 51 | } 52 | 53 | outputList 54 | } 55 | 56 | def forestInitializationEndReduce(a : (Set[Long], Long, Set[Long] ), b : (Set[Long], Long, Set[Long] )) = 57 | { 58 | (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3) 59 | } 60 | 61 | def starDetectionRule1(node : ( Long, (Set[Long], Long, Set[Long] ))) : Iterable[( Long, (Set[Long], Long, Long, Set[Long] ))] = // neighbor, p(v), p(p(v)),child 62 | { 63 | var outputList : ListBuffer[( Long, (Set[Long], Long, Long, Set[Long] ))] = new ListBuffer 64 | 65 | val it = node._2._3.iterator // iteratore child 66 | 67 | while(it.hasNext) 68 | { 69 | val next = it.next 70 | outputList.prepend( ( next, (Set(), -1, node._2._2, Set()) ) ) 71 | } 72 | 73 | if(node._2._2 == node._1) // se min = p(v) == p(p(v)) 74 | { 75 | outputList.prepend( ( node._1, (node._2._1, node._2._2, node._2._2, node._2._3) ) ) 76 | } else 77 | { 78 | outputList.prepend( ( node._1, (node._2._1, node._2._2, -1, node._2._3) ) ) 79 | } 80 | 81 | outputList 82 | } 83 | 84 | def starDetectionReduce1(a : (Set[Long], Long, Long, Set[Long] ), b : (Set[Long], Long, Long, Set[Long] )) = 85 | { 86 | (a._1 ++ b._1, Math.max(a._2, b._2), Math.max(a._3, b._3), a._4 ++ b._4) 87 | } 88 | 89 | def starDetectionRule1End2Start(node : ( Long, (Set[Long], Long, Long, Set[Long] ))) : Iterable[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = // neighbour, min, s(v), child 90 | { 91 | var outputList : ListBuffer[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = new ListBuffer 92 | 93 | if(node._2._2 == node._2._3) // se p(v) == p(p(v)) 94 | { 95 | outputList.prepend( (node._1, (node._2._1, node._2._2, true, node._2._4)) ) 96 | } else 97 | { 98 | outputList.prepend((node._1, (node._2._1, node._2._2, false, node._2._4)) ) 99 | 100 | if(node._2._3 >= 0) 101 | { 102 | outputList.prepend( (node._2._3, (Set(), -1, false, Set())) ) 103 | // rule 2, il nonno non può essere star 104 | } 105 | } 106 | 107 | outputList 108 | } 109 | 110 | def starDetectionReduce2(a : (Set[Long], Long, Boolean, Set[Long] ), b : (Set[Long], Long, Boolean, Set[Long] )) = 111 | { 112 | (a._1 ++ b._1, Math.max(a._2, b._2), a._3 && b._3, a._4 ++ b._4) 113 | } 114 | 115 | def starDetectionRule3(node : ( Long, (Set[Long], Long, Boolean, Set[Long] ))) = 116 | { 117 | var outputList : ListBuffer[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = new ListBuffer 118 | 119 | if(!node._2._3) 120 | { 121 | val it = node._2._4.iterator // iteratore child 122 | 123 | while(it.hasNext) 124 | { 125 | val next = it.next 126 | outputList.prepend( ( next, (Set(), -1, false, Set()) ) ) 127 | } 128 | } 129 | 130 | outputList.prepend( ( node._1, (node._2._1, node._2._2, node._2._3, node._2._4) ) ) 131 | 132 | outputList 133 | } 134 | 135 | def starDetectionReduce3(a : (Set[Long], Long, Boolean, Set[Long]), b : (Set[Long], Long, Boolean, Set[Long] )) = 136 | { 137 | (a._1 ++ b._1, Math.max(a._2, b._2), a._3 && b._3, a._4 ++ b._4) 138 | } 139 | 140 | def conditionalStartHookingPre(node : ( Long, (Set[Long], Long, Boolean ))) = 141 | { 142 | var outputList : ListBuffer[( Long, (Long, Set[Long]) )] = new ListBuffer //min, otherMin 143 | 144 | val it = node._2._1.iterator // iteratore neighbour 145 | 146 | while(it.hasNext) 147 | { 148 | val next = it.next 149 | 150 | outputList.prepend( ( next, (-1, Set(node._2._2)) ) ) 151 | } 152 | 153 | outputList.prepend( ( node._1, (node._2._2, Set()) ) ) 154 | 155 | outputList 156 | } 157 | 158 | def getNotMinus(a : Long, b : Long) = 159 | { 160 | if(a == -1) b 161 | else if(b == -1) a 162 | else Math.min(a, b) 163 | } 164 | 165 | def conditionalStarHookingPreReduce(a : (Long, Set[Long]), b : (Long, Set[Long])) = 166 | { 167 | (getNotMinus(a._1, b._1), a._2++b._2) 168 | } 169 | 170 | def conditionalStarHookingPreEnd(unconditional : Boolean, node : (Long, (Long, Set[Long]))) = 171 | { 172 | if(unconditional) 173 | { 174 | 175 | val a = node._2._2.filter(t => t != node._2._1) 176 | if(a.isEmpty) 177 | { 178 | (node._2._1, -1L) 179 | } else 180 | 181 | (node._2._1, a.min) 182 | } else 183 | { 184 | if(node._2._2.isEmpty) 185 | (node._2._1, -1L) 186 | else 187 | (node._2._1, node._2._2.min) 188 | } 189 | } 190 | 191 | def conditionalStarHookingPreEndReduce(a : Long, b : Long) = 192 | { 193 | if(a == -1) b 194 | else if(b == -1) a 195 | else 196 | Math.min(a, b) 197 | } 198 | 199 | def conditionalStartHooking(node : ( Long, ((Set[Long], Long, Boolean, Set[Long]), Option[Long] ))) = // neighbout, min, star, child, (minReceivedChild 200 | { 201 | var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer //min, otherMin 202 | 203 | if(node._2._1._3 && node._1 == node._2._1._2 && node._2._2.isDefined && node._2._2.get != -1 && node._2._2.get < node._2._1._2) // se star e root 204 | { 205 | outputList.prepend((node._1, (node._2._1._1, node._2._2.get, node._2._1._4))) 206 | outputList.prepend((node._2._2.get, (Set(), -1, Set(node._1)))) 207 | } else 208 | { 209 | outputList.prepend((node._1, (node._2._1._1, node._2._1._2, node._2._1._4))) 210 | } 211 | 212 | outputList 213 | } 214 | 215 | def conditionalStartHookingReduce(a : (Set[Long], Long, Set[Long]) , b : (Set[Long], Long, Set[Long]) ) = 216 | { 217 | (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3) 218 | } 219 | 220 | def unconditionalStartHooking(node : ( Long, ((Set[Long], Long, Boolean, Set[Long]), Option[Long] ))) = // neighbout, min, star, minReceivedChild 221 | { 222 | var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer //min, otherMin 223 | 224 | if(node._2._1._3 && node._1 == node._2._1._2 && node._2._2.isDefined && node._2._2.get != -1) // se star e root 225 | { 226 | outputList.prepend((node._1, (node._2._1._1, node._2._2.get, node._2._1._4))) 227 | outputList.prepend((node._2._2.get, (Set(), -1, Set(node._1)))) 228 | } else 229 | { 230 | outputList.prepend((node._1, (node._2._1._1, node._2._1._2, node._2._1._4))) 231 | } 232 | 233 | outputList 234 | } 235 | 236 | def pointerJumping(node : ( Long, ((Set[Long], Long, Set[Long])))) = 237 | { 238 | var outputList : ListBuffer[( Long, (Set[Long], Long, Long) )] = new ListBuffer 239 | 240 | val it = node._2._3.iterator // iteratore child 241 | 242 | while(it.hasNext) 243 | { 244 | val next = it.next 245 | 246 | outputList.prepend((next, (Set(), node._2._2, -1))) 247 | } 248 | 249 | if(node._1 == node._2._2) 250 | { 251 | outputList.prepend((node._1, (node._2._1, node._2._2, node._2._2))) 252 | } else 253 | { 254 | outputList.prepend((node._1, (node._2._1, -1, node._2._2))) 255 | } 256 | 257 | outputList 258 | } 259 | 260 | def pointerJumpingReduce(a : (Set[Long], Long, Long), b : (Set[Long], Long, Long)) = 261 | { 262 | (a._1 ++ b._1, Math.max(a._2, b._2), Math.max(a._3, b._3)) 263 | } 264 | 265 | def rebuildChild(node : ( Long, (Set[Long], Long, Long) )) = 266 | { 267 | var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer 268 | 269 | outputList.prepend((node._2._2, (Set(), -1, Set(node._1)))) 270 | outputList.prepend((node._1, (node._2._1, node._2._2, Set()))) 271 | 272 | outputList 273 | } 274 | 275 | def rebuildChildReduce(a : (Set[Long], Long, Set[Long]), b : (Set[Long], Long, Set[Long])) = 276 | { 277 | (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3) 278 | } 279 | 280 | def iteration( util : CCUtil, 281 | graph : RDD[(Long, (Set[Long], Long, Set[Long]))], 282 | printStat : Boolean, 283 | reduceInputSizeAccumulator : Accumulator[Long], 284 | reduceInputMessageNumberAccumulator : Accumulator[Long]) = 285 | { 286 | val timeStepStart = System.currentTimeMillis() 287 | 288 | val rule3 = graph .flatMap( item => starDetectionRule1( item ) ) 289 | .reduceByKey(starDetectionReduce1) 290 | .flatMap( item => starDetectionRule1End2Start( item ) ) 291 | .reduceByKey(starDetectionReduce2) 292 | .flatMap( item => starDetectionRule3( item ) ) 293 | .reduceByKey(starDetectionReduce3) 294 | .cache 295 | val preProcessing = rule3.map(t => (t._1, (t._2._1, t._2._2, t._2._3)))//.filter(node => node._1 != node._2._2) 296 | .flatMap( item => conditionalStartHookingPre( item ) ) 297 | .reduceByKey(conditionalStarHookingPreReduce) 298 | .map( item => conditionalStarHookingPreEnd(false, item ) ) 299 | .reduceByKey(conditionalStarHookingPreEndReduce) 300 | 301 | val starHooking = rule3.leftOuterJoin(preProcessing).flatMap(item => conditionalStartHooking(item)) 302 | .reduceByKey(conditionalStartHookingReduce) 303 | .cache 304 | 305 | val ruleAfterStarHooking = starHooking .flatMap( item => starDetectionRule1( item ) ) 306 | .reduceByKey(starDetectionReduce1) 307 | .flatMap( item => starDetectionRule1End2Start( item ) ) 308 | .reduceByKey(starDetectionReduce2) 309 | .flatMap( item => starDetectionRule3( item ) ) 310 | .reduceByKey(starDetectionReduce3) 311 | .cache 312 | 313 | val preProcessing2 = ruleAfterStarHooking.map(t => (t._1, (t._2._1, t._2._2, t._2._3)))//.filter(node => node._1 != node._2._2) 314 | .flatMap( item => conditionalStartHookingPre( item ) ) 315 | .reduceByKey(conditionalStarHookingPreReduce) 316 | .map( item => conditionalStarHookingPreEnd(true, item ) ) 317 | .reduceByKey(conditionalStarHookingPreEndReduce) 318 | 319 | val unconditionalStarHooking = ruleAfterStarHooking.leftOuterJoin(preProcessing2).flatMap(item => unconditionalStartHooking(item)) 320 | .reduceByKey(conditionalStartHookingReduce).cache 321 | 322 | val pointerJumpingResult = unconditionalStarHooking.flatMap(item => pointerJumping(item)).reduceByKey(pointerJumpingReduce) 323 | 324 | 325 | val restart = pointerJumpingResult.flatMap(rebuildChild).reduceByKey(rebuildChildReduce).cache 326 | val termination = pointerJumpingResult.filter(t => t._2._2 != t._2._3).count 327 | 328 | 329 | val timeStepEnd = System.currentTimeMillis() 330 | util.io.printStat(termination, "termination") 331 | util.io.printStat(timeStepEnd - timeStepStart, "timeIteration") 332 | 333 | 334 | (restart, termination) 335 | } 336 | 337 | def main( args : Array[String] ) : Unit = 338 | { 339 | val timeBegin = System.currentTimeMillis() 340 | 341 | val property = new CCProperties("SGC", args(0)).load.getImmutable 342 | 343 | val util = new CCUtil(property) 344 | val spark = util.getSparkContext() 345 | 346 | val timeSparkLoaded = System.currentTimeMillis() 347 | val file = spark.textFile( property.dataset , property.sparkPartition) 348 | 349 | util.io.printFileStart(property.appName) 350 | 351 | val (parsedData, fusedData) = util.loadEdgeFromFile(file) 352 | 353 | var ret = fusedData.map( item => ( item._1, item._2.toSet) ) 354 | 355 | val timeDataLoaded = System.currentTimeMillis() 356 | 357 | var control = false; 358 | 359 | 360 | val reduceInputMessageNumberAccumulator = spark.accumulator(0L) 361 | val reduceInputSizeAccumulator = spark.accumulator(0L) 362 | 363 | val previous = ret 364 | var retMap = ret.flatMap( item => forestInitializationStart( item ) ) 365 | 366 | retMap = retMap.reduceByKey( forestInitializationReduceStart ).cache 367 | retMap.count 368 | 369 | var forestOut = retMap.flatMap( item => forestInitializationEnd( item ) ) 370 | .reduceByKey(forestInitializationEndReduce) 371 | 372 | var (graph, termination) = iteration(util, forestOut, property.printMessageStat, reduceInputMessageNumberAccumulator, reduceInputSizeAccumulator) 373 | 374 | var step = 2 + 14 375 | 376 | while(termination != 0) 377 | { 378 | val (graph2, termination2) = iteration(util, graph, property.printMessageStat, reduceInputMessageNumberAccumulator, reduceInputSizeAccumulator) 379 | graph = graph2 380 | termination = termination2 381 | step = step + 14 382 | } 383 | 384 | val timeEnd = System.currentTimeMillis() 385 | 386 | util.testEnded( graph.map(t => (t._2._2, 1)).reduceByKey{case (a,b) => a+b}, 387 | step, 388 | timeBegin, 389 | timeEnd, 390 | timeSparkLoaded, 391 | timeDataLoaded, 392 | reduceInputMessageNumberAccumulator.value, 393 | reduceInputSizeAccumulator.value) 394 | } 395 | 396 | } 397 | --------------------------------------------------------------------------------