├── run
    ├── dataset_example
    └── config_example
├── .gitignore
├── src
    ├── cracker
    │   ├── CrackerMessageSize.scala
    │   ├── CrackerMainJava.java
    │   ├── CrackerMessagePropagation.scala
    │   ├── CrackerMessageRedPhase.scala
    │   ├── CrackerMessageIdentification.scala
    │   ├── CrackerMessageTree.scala
    │   ├── CrackerStats.scala
    │   ├── CrackerMain.scala
    │   └── CrackerAlgorithm.scala
    ├── ccf
    │   ├── CcfMessage.scala
    │   ├── CcfMainJava.java
    │   └── CcfMain.scala
    ├── ccmr
    │   ├── CcmrMainJava.java
    │   ├── CcmrMessage.scala
    │   └── CcmrMain.scala
    ├── sgc
    │   ├── SGCMainJava.java
    │   ├── SGCMessage.scala
    │   └── SGCMain.scala
    ├── hashMin
    │   ├── HashMinMainJava.java
    │   ├── HashMinMessage.scala
    │   └── HashMinMain.scala
    ├── hashToMin
    │   ├── HashToMinMainJava.java
    │   ├── HashToMinMessage.scala
    │   └── HashToMinMain.scala
    ├── alternating
    │   ├── AlternatingMainJava.java
    │   ├── AlternatingStats.scala
    │   ├── AlternatingMain.scala
    │   └── AlternatingAlgorithm.scala
    ├── crackerAllOptimizations
    │   ├── CrackerMainJava.java
    │   └── CrackerAllOptimizationsMain.scala
    ├── alternatingOptimized
    │   ├── AlternatingOptimizedMainJava.java
    │   ├── AlternatingMessage.scala
    │   └── AlternatingOptimizedMain.scala
    └── util
    │   ├── Main.java
    │   ├── CCPropertiesImmutable.scala
    │   ├── CCProperties.scala
    │   ├── CCUtilIO.scala
    │   └── CCUtil.scala
├── LICENSE
├── LICENSE.txt
├── README.md
└── pom.xml


/run/dataset_example:
--------------------------------------------------------------------------------
1 | 1	2
2 | 1	3
3 | 2	4
4 | 3	5
5 | 4	6
6 | 7	8


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | /bin/
3 | .classpath
4 | .project
5 | .settings
6 | .idea
7 | cracker.iml
8 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMessageSize.scala:
--------------------------------------------------------------------------------
1 | package cracker
2 | 
3 | @serializable
4 | trait CrackerMessageSize {
5 | def getMessageSize : Long
6 | }


--------------------------------------------------------------------------------
/src/ccf/CcfMessage.scala:
--------------------------------------------------------------------------------
1 | package ccf
2 | 
3 | @serializable
4 | class CcfMessage (val cc: Set[Int], val terminate : Boolean) 
5 | {
6 | 	def voteToHalt : Boolean = terminate
7 | }


--------------------------------------------------------------------------------
/src/ccf/CcfMainJava.java:
--------------------------------------------------------------------------------
 1 | package ccf;
 2 | 
 3 | public class CcfMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		CcfMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/ccmr/CcmrMainJava.java:
--------------------------------------------------------------------------------
 1 | package ccmr;
 2 | 
 3 | public class CcmrMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		CcmrMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/sgc/SGCMainJava.java:
--------------------------------------------------------------------------------
 1 | package sgc;
 2 | 
 3 | public class SGCMainJava
 4 | {
 5 | 	public static void main(final String[] args_)
 6 | 	{
 7 | 		SGCMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMainJava.java:
--------------------------------------------------------------------------------
 1 | package cracker;
 2 | 
 3 | public class CrackerMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		CrackerTreeMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/hashMin/HashMinMainJava.java:
--------------------------------------------------------------------------------
 1 | package hashMin;
 2 | 
 3 | public class HashMinMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		HashMinMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/hashToMin/HashToMinMainJava.java:
--------------------------------------------------------------------------------
 1 | package hashToMin;
 2 | 
 3 | public class HashToMinMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		HashToMinMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/alternating/AlternatingMainJava.java:
--------------------------------------------------------------------------------
 1 | package alternating;
 2 | 
 3 | public class AlternatingMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		AlternatingMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMessagePropagation.scala:
--------------------------------------------------------------------------------
1 | package cracker
2 | 
3 | @serializable
4 | class CrackerTreeMessagePropagation (val min : Long, val child : Set[Long]) extends CrackerMessageSize
5 | {
6 | 	def getMessageSize = child.size + 1
7 | }


--------------------------------------------------------------------------------
/src/ccmr/CcmrMessage.scala:
--------------------------------------------------------------------------------
1 | package ccmr
2 | 
3 | import scala.collection.immutable.TreeSet
4 | 
5 | @serializable
6 | class CcmrMessage (val cc: TreeSet[Long], val iterationNeeded : Boolean) 
7 | {
8 | 	def voteToHalt : Boolean = !iterationNeeded
9 | }


--------------------------------------------------------------------------------
/src/crackerAllOptimizations/CrackerMainJava.java:
--------------------------------------------------------------------------------
 1 | package crackerAllOptimizations;
 2 | 
 3 | public class CrackerMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		CrackerAllOptimizationsMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/sgc/SGCMessage.scala:
--------------------------------------------------------------------------------
1 | package sgc
2 | 
3 | import scala.collection.immutable.TreeSet
4 | 
5 | @serializable
6 | class HashToMinMessage (val min: Long, val cc: Set[Long], val sizeBefore : Long)
7 | {
8 | 	def voteToHalt : Boolean = sizeBefore == cc.size
9 | }


--------------------------------------------------------------------------------
/src/alternatingOptimized/AlternatingOptimizedMainJava.java:
--------------------------------------------------------------------------------
 1 | package alternatingOptimized;
 2 | 
 3 | public class AlternatingOptimizedMainJava 
 4 | {
 5 | 	public static void main(String[] args_)
 6 | 	{
 7 | 		AlternatingOptimizedMain.main(args_);
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/hashMin/HashMinMessage.scala:
--------------------------------------------------------------------------------
1 | package hashMin
2 | 
3 | import scala.collection.immutable.TreeSet
4 | 
5 | @serializable
6 | class HashMinMessage (val min: Long, val minBefore : Long, val neigh : Set[Long]) 
7 | {
8 | 	def voteToHalt : Boolean = minBefore == min
9 | }


--------------------------------------------------------------------------------
/src/hashToMin/HashToMinMessage.scala:
--------------------------------------------------------------------------------
1 | package hashToMin
2 | 
3 | import scala.collection.immutable.TreeSet
4 | 
5 | @serializable
6 | class HashToMinMessage (val min: Long, val cc: Set[Long], val sizeBefore : Long) 
7 | {
8 | 	def voteToHalt : Boolean = sizeBefore == cc.size
9 | }


--------------------------------------------------------------------------------
/src/alternatingOptimized/AlternatingMessage.scala:
--------------------------------------------------------------------------------
 1 | package alternatingOptimized
 2 | 
 3 | @serializable
 4 | class AlternatingMessage (val root : Boolean) 
 5 | {
 6 | 	val isMarkedAsRootNode = root
 7 | }
 8 | 
 9 | object AlternatingMessage
10 | {
11 | 	val empty = new AlternatingMessage(false)
12 | }


--------------------------------------------------------------------------------
/run/config_example:
--------------------------------------------------------------------------------
 1 | 
 2 | dataset run/dataset_example
 3 | outputFile run/output
 4 | printAll true
 5 | 
 6 | edgelistSeparator \t
 7 | 
 8 | jarPath cracker-0.0.1-SNAPSHOT.jar
 9 | 
10 | sparkPartition 2
11 | #sparkMaster spark://<your spark master>:7077
12 | sparkMaster local[2]
13 | printMessageStat false
14 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMessageRedPhase.scala:
--------------------------------------------------------------------------------
 1 | package cracker
 2 | 
 3 | @serializable
 4 | class CrackerTreeMessageRedPhase (val first : Option[CrackerTreeMessageIdentification], val second : Option[CrackerTreeMessageTree]) extends CrackerMessageSize
 5 | {
 6 | 	def getMessageSize = first.getOrElse(CrackerTreeMessageIdentification.empty).getMessageSize + second.getOrElse(CrackerTreeMessageTree.empty).getMessageSize 
 7 | }
 8 | 
 9 | object CrackerTreeMessageRedPhase
10 | {
11 | 	def apply(first : CrackerTreeMessageIdentification) = new CrackerTreeMessageRedPhase(Option.apply(first), Option.empty)
12 | 	def apply(second : CrackerTreeMessageTree) = new CrackerTreeMessageRedPhase(Option.empty, Option.apply(second))
13 | }


--------------------------------------------------------------------------------
/src/cracker/CrackerMessageIdentification.scala:
--------------------------------------------------------------------------------
 1 | package cracker
 2 | 
 3 | @serializable
 4 | class CrackerTreeMessageIdentification (val min: Long, val neigh: Set[Long]) extends CrackerMessageSize
 5 | {
 6 | 	def voteToHalt = neigh.isEmpty
 7 | 	
 8 | 	def getMessageSize = neigh.size + 1
 9 | 	
10 | 	def merge(other : Option[CrackerTreeMessageIdentification]) : Option[CrackerTreeMessageIdentification] =
11 | 	{
12 | 		if(other.isDefined)
13 | 		{
14 | 			Option.apply(new CrackerTreeMessageIdentification(Math.min(min, other.get.min), neigh ++ other.get.neigh))
15 | 		} else
16 | 		{
17 | 			Option.apply(CrackerTreeMessageIdentification.this)
18 | 		}
19 | 	}
20 | 	
21 | 	override def toString = neigh.toString
22 | }
23 | 
24 | object CrackerTreeMessageIdentification
25 | {
26 | 	def empty = new CrackerTreeMessageIdentification(-1, Set())
27 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | http://opensource.org/licenses/mit-license.php
 3 | 
 4 | Copyright (c) 2015 Thibault Debatty
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | http://opensource.org/licenses/mit-license.php
 3 | 
 4 | Copyright (c) 2015 Thibault Debatty
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMessageTree.scala:
--------------------------------------------------------------------------------
 1 | package cracker
 2 | 
 3 | @serializable
 4 | class CrackerTreeMessageTree (val parent : Long, val child : Set[Long]) extends CrackerMessageSize
 5 | {
 6 | 	def getMessageSize = child.size + 1
 7 | 	
 8 | 	def merge(other : Option[CrackerTreeMessageTree]) : Option[CrackerTreeMessageTree] =
 9 | 	{
10 | 		if(other.isDefined)
11 | 		{
12 | 			var parentNew = parent
13 | 			
14 | 			if(parentNew == -1)
15 | 			{
16 | 				parentNew = other.get.parent
17 | 			}
18 | 			
19 | 			Option.apply(new CrackerTreeMessageTree(parentNew, child ++ other.get.child))
20 | 		} else
21 | 		{
22 | 			Option.apply(CrackerTreeMessageTree.this)
23 | 		}
24 | 	}
25 | 	
26 | 	def merge(other : CrackerTreeMessageTree) : CrackerTreeMessageTree =
27 | 	{
28 | 		var parentNew = parent
29 | 		
30 | 		if(parentNew == -1)
31 | 		{
32 | 			parentNew = other.parent
33 | 		}
34 | 		
35 | 		new CrackerTreeMessageTree(parentNew, child ++ other.child)
36 | 	}
37 | 	
38 | 	def getMessagePropagation(id : Long) = 
39 | 	{
40 | 		if(parent == -1)
41 | 		{
42 | 			new CrackerTreeMessagePropagation(id, child)
43 | 		} else
44 | 		{
45 | 			new CrackerTreeMessagePropagation(-1, child)
46 | 		}
47 | 	}
48 | }
49 | 
50 | object CrackerTreeMessageTree
51 | {
52 | 	def empty = new CrackerTreeMessageTree(-1, Set())
53 | }


--------------------------------------------------------------------------------
/src/util/Main.java:
--------------------------------------------------------------------------------
 1 | package util;
 2 | 
 3 | 
 4 | public class Main 
 5 | {
 6 | 	public static void main(String[] args_)
 7 | 	{
 8 | 		if(args_.length > 1)
 9 | 		{
10 | 			String algorithmName = args_[0];
11 | 			String[] argsParsed = new String[args_.length - 1];
12 | 					
13 | 			System.arraycopy( args_, 1, argsParsed, 0, args_.length - 1 );
14 | 			
15 | 			switch(algorithmName)
16 | 			{
17 | 			case "CRACKER" : 
18 | 			{
19 | //				cracker.CrackerMainJava.main(argsParsed);
20 | 				crackerAllOptimizations.CrackerMainJava.main(argsParsed);
21 | 				break;
22 | 			}
23 | 			case "CRACKERALL" :
24 | 			{
25 | 				crackerAllOptimizations.CrackerMainJava.main(argsParsed);
26 | 				break;
27 | 			}
28 | 			case "CCF" : 
29 | 			{
30 | 				ccf.CcfMainJava.main(argsParsed);
31 | 				break;
32 | 			}
33 | 			case "CCMR" : 
34 | 			{
35 | 				ccmr.CcmrMainJava.main(argsParsed);
36 | 				break;
37 | 			}
38 | 			case "PEGASUS" : 
39 | 			{
40 | 				hashMin.HashMinMainJava.main(argsParsed);
41 | 				break;
42 | 			}
43 | 			case "HASHTOMIN" : 
44 | 			{
45 | 				hashToMin.HashToMinMainJava.main(argsParsed);
46 | 				break;
47 | 			}
48 | 			case "ALTERNATINGOPTIMIZED" :
49 | 			{
50 | 				alternatingOptimized.AlternatingOptimizedMainJava.main(argsParsed);
51 | 				break;
52 | 			}
53 | 			default : 
54 | 			{
55 | 				System.out.println("ERROR: Algorithm name not recognized");
56 | 				break;
57 | 			}
58 | 			}
59 | 			
60 | 		} else
61 | 		{
62 | 			System.out.println("ERROR Command input must be: command algorithmName configFile");
63 | 		}
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/src/alternating/AlternatingStats.scala:
--------------------------------------------------------------------------------
 1 | package alternating
 2 | 
 3 | import util.CCPropertiesImmutable
 4 | import org.apache.spark.SparkContext._
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.rdd.RDD
 7 | import util.CCUtil
 8 | import cracker.CrackerStats
 9 | 
10 | @serializable
11 | class AlternatingStats(property : CCPropertiesImmutable, util : CCUtil, spark : SparkContext) {
12 | 
13 | //	val crackerStats = new CrackerStats(property, util, spark)
14 | 	val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
15 | 	val reduceInputSizeAccumulator = spark.accumulator(0L)
16 | 			
17 | 	def printSimplificationAlternating(step : Int, rdd : RDD[(Long, Set[Long])]) =
18 |     {
19 |         if (property.printMessageStat) 
20 |         {
21 | 			util.printSimplification(step, rdd.count, rdd.map(t=>t._2.size.toLong).reduce{case(a,b)=>a+b}, rdd.map(t=>t._2.size).max)
22 | 		}
23 | //        if(property.printAll)
24 | //		{
25 | //			printGraph(util, step, "INPUT_BLUE", rdd)
26 | //		}
27 |     }
28 | 	
29 | 	def countMessage(ret : RDD[(Long, Set[Long])], step : Int) =
30 | 	{
31 | 		if (property.printMessageStat) {
32 | 			val previousMessageSize = reduceInputSizeAccumulator.value
33 | 			val previousMessageNumber = reduceInputMessageNumberAccumulator.value
34 | 			
35 | 			ret.foreach(t => reduceInputSizeAccumulator += t._2.size + 1)
36 | 			reduceInputMessageNumberAccumulator += ret.count
37 | 			
38 | 			util.printMessageStep(step, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
39 | 		}
40 | 	}
41 | }


--------------------------------------------------------------------------------
/src/util/CCPropertiesImmutable.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | @serializable
 4 | class CCPropertiesImmutable(algorithmNameFromConfig : String, 
 5 | 							val dataset : String, 
 6 | 							val dataset2 : String,
 7 | 							val outputFile : String,
 8 | 							val outputFileCC : String,
 9 | 							val jarPath : String, 
10 | 							val sparkMaster : String,
11 | 							val sparkPartition : Int,
12 | 							val sparkExecutorMemory : String, 
13 | 							val sparkBlockManagerSlaveTimeoutMs : String,
14 | 							val sparkCoresMax : Int,
15 | 							val sparkShuffleManager : String,
16 | 							val sparkCompressionCodec : String,
17 | 							val sparkShuffleConsolidateFiles : String,
18 | 							val sparkAkkaFrameSize : String,
19 | 							val sparkDriverMaxResultSize : String,
20 | 							val sparkExecutorInstances : Int,
21 | 							val separator : String,
22 | 							val separatorCC : String,
23 | 							val printMessageStat : Boolean,
24 | 							val printLargestCC : Boolean,
25 | 							val printCC : Boolean,
26 | 							val printCCDistribution : Boolean,
27 | 							val printAll : Boolean,
28 | 							val customColumnValue : String,
29 | 							val switchLocal : Int,
30 | 							val switchLocalActive : Boolean,
31 | 							val vertexIdMultiplier : Int,
32 | 							val vertexNumber : Int,
33 | 							val loadBalancing : Boolean,
34 | 							val selfFunction : String,
35 | 							val cadidateFunction : String,
36 | 							val selfStar : Boolean,
37 | 							val transmitPreviousNeighbours : Boolean,
38 | 							val edgeThreshold : Double,
39 | 							val coreThreshold : Int,
40 | 							val invert : Boolean) extends Serializable
41 | {
42 |     val algorithmName = if(loadBalancing) algorithmNameFromConfig+"_LOAD" else algorithmNameFromConfig
43 | 	val appName = algorithmName+"_"+dataset
44 | 	val allStat = printMessageStat && appName.contains("CRA")
45 | 	val filenameLargestCC = dataset+"_largestCC"
46 | }


--------------------------------------------------------------------------------
/src/cracker/CrackerStats.scala:
--------------------------------------------------------------------------------
 1 | package cracker
 2 | 
 3 | import org.apache.spark.SparkContext._
 4 | import org.apache.spark.SparkContext
 5 | import org.apache.spark.rdd.RDD
 6 | import util.CCPropertiesImmutable
 7 | import util.CCUtil
 8 | 
 9 | @serializable
10 | class CrackerStats(property: CCPropertiesImmutable, util: CCUtil, spark: SparkContext) {
11 | 
12 |   val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
13 |   val reduceInputSizeAccumulator = spark.accumulator(0L)
14 | 
15 |   def printSimplification(step: Int, rdd: RDD[(Long, CrackerTreeMessageIdentification)]) = {
16 |     if (property.printMessageStat) {
17 |       if (rdd.count > 0)
18 |         util.printSimplification(step, rdd.count, rdd.map(t => t._2.neigh.size.toLong).sum, rdd.map(t => t._2.neigh.size).max)
19 |       else
20 |         util.printSimplification(step, 0, 0, 0)
21 |     }
22 |   }
23 | 
24 |   def printSimplificationCCF(step: Int, rdd: RDD[(Long, Iterable[Long])]) = {
25 |     if (property.printMessageStat) {
26 |       val count = rdd.count
27 |       if (count > 0)
28 |         util.printSimplification(step, count, rdd.map(t => t._2.size.toLong).sum, rdd.map(t => t._2.size).max)
29 |       else
30 |         util.printSimplification(step, 0, 0, 0)
31 |     }
32 |     //        if(property.printAll)
33 |     //		{
34 |     //			printGraph(util, step, "INPUT_BLUE", rdd)
35 |     //		}
36 |   }
37 | 
38 |   def printMessageStats[A <% CrackerMessageSize](step: Int, rdd: RDD[(Long, A)]) = {
39 |     if (property.printMessageStat) {
40 |       val previousMessageSize = reduceInputSizeAccumulator.value
41 |       val previousMessageNumber = reduceInputMessageNumberAccumulator.value
42 | 
43 |       rdd.foreach(t => reduceInputSizeAccumulator += t._2.getMessageSize)
44 |       reduceInputMessageNumberAccumulator += rdd.count
45 | 
46 |       util.printMessageStep(step, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
47 |     }
48 |   }
49 | 
50 |   def printGraph(util: CCUtil, step: Int, description: String, g: RDD[(Long, CrackerTreeMessageIdentification)]) = {
51 |     util.io.printToFile("graph.txt", "STEP " + step + "\t[" + description + "]\t" + g.map(t => "{" + t._1 + " " + t._2.toString + "} ").reduce { case (a, b) => a + b } + "\n")
52 |   }
53 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Cracker
 2 | =======
 3 | 
 4 | Crumbling large graphs into connected components
 5 | 
 6 | Abstract—Finding connected components is a fundamental task in applications dealing with graph analytics, such as social network
 7 | analysis, web graph mining and image processing. The exponentially growing size of today’s graphs has required the definition of new
 8 | computational models and algorithms for their efficient processing on highly distributed architectures. In this paper we present
 9 | CRACKER, an efficient iterative MapReduce-like algorithm to detect connected components in large graphs. The strategy of CRACKER
10 | is to transform the input graph in a set of trees, one for each connected component in the graph. Nodes are iteratively removed from
11 | the graph and added to the trees, reducing the amount of computation at each iteration. We prove the correctness of the algorithm,
12 | evaluate its computational cost and provide an extensive experimental evaluation considering a wide variety of synthetic and real-world
13 | graphs. The experimental results show that CRACKER consistently outperforms state-of-the-art approaches both in terms of total
14 | computation time and volume of messages exchanged.
15 | 
16 | 
17 | ### Publications
18 | 
19 | **2016 - IEEE Transaction on Parallel and Distributed Systems**
20 | 
21 | Lulli, Alessandro, et al. 
22 | **Fast Connected Components Computation in Large Graphs by Vertex Pruning.** 
23 | IEEE Transactions on parallel and distributed systems (2016) (to appear).
24 | 
25 | @article{lulli2016fast,
26 |   title={Fast Connected Components Computation in Large Graphs by Vertex Pruning},
27 |   author={Lulli, Alessandro and Carlini, Emanuele and Dazzi, Patrizio and Lucchese, Claudio and Ricci, Laura},
28 |   journal={IEEE Transactions on parallel and distributed systems},
29 |   year={2016},
30 |   publisher={IEEE}
31 | }
32 | 
33 | **2015 - IEEE Symposium on Computers and Communication (ISCC)**
34 | 
35 | Lulli, Alessandro, et al. 
36 | **Cracker: Crumbling large graphs into connected components.** 
37 | 2015 IEEE Symposium on Computers and Communication (ISCC). IEEE, 2015.
38 | 
39 | @inproceedings{lulli2015cracker,
40 |   title={Cracker: Crumbling large graphs into connected components},
41 |   author={Lulli, Alessandro and Ricci, Laura and Carlini, Emanuele and Dazzi, Patrizio and Lucchese, Claudio},
42 |   booktitle={2015 IEEE Symposium on Computers and Communication (ISCC)},
43 |   pages={574--581},
44 |   year={2015},
45 |   organization={IEEE}
46 | }
47 | 
48 | ### How to build
49 | 
50 | mvn clean package
51 | 
52 | ### How to run
53 | 
54 | spark-submit --class util.Main --executor-cores <#core> --driver-memory <#memory>g --master spark://<your spark master>:7077 target/cracker-0.0.1-SNAPSHOT.jar CRACKER config_example
55 | 
56 | 


--------------------------------------------------------------------------------
/src/alternating/AlternatingMain.scala:
--------------------------------------------------------------------------------
  1 | package alternating
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.immutable.TreeSet
  5 | import scala.collection.mutable.ListBuffer
  6 | import org.apache.spark.Accumulator
  7 | import org.apache.spark.SparkContext._
  8 | import org.apache.spark.SparkContext
  9 | import org.apache.spark.rdd.RDD
 10 | import util.CCUtil
 11 | import util.CCProperties
 12 | import cracker.CrackerStats
 13 | 
 14 | object AlternatingMain {
 15 | 	
 16 | 	
 17 | 	def main(args : Array[String]) : Unit =
 18 | 		{
 19 | 			val timeBegin = System.currentTimeMillis()
 20 | 
 21 |             val property = new CCProperties("ALTERNATING", args(0)).load.getImmutable
 22 |             
 23 |             val util = new CCUtil(property)
 24 | 			
 25 |             val spark = util.getSparkContext()
 26 |             val alternating = new AlternatingAlgorithm
 27 |             val stats = new AlternatingStats(property, util, spark)
 28 |             
 29 |             val timeSparkLoaded = System.currentTimeMillis()
 30 |             val file = spark.textFile( property.dataset , property.sparkPartition)
 31 | 
 32 |             util.io.printFileStart(property.appName)
 33 |             
 34 | //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 35 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 36 |             
 37 | 			var ret = fusedData.flatMap(alternating.generateInitialEdge).reduceByKey(alternating.reduceMessageByKey).cache //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) )
 38 | 			ret.count
 39 | 			
 40 | 			val timeDataLoaded = System.currentTimeMillis()
 41 | 
 42 | 			var control = false;
 43 | 			var step = 0
 44 | 
 45 | 			val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
 46 | 			val reduceInputSizeAccumulator = spark.accumulator(0L)
 47 | 			
 48 | 			var previousRDDForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache
 49 | 			previousRDDForConvergence.count
 50 | 
 51 | 			while (!control) {
 52 | 				val timeStepStart = System.currentTimeMillis()
 53 | 
 54 | 				stats.printSimplificationAlternating(step, ret)
 55 | 				ret = ret.flatMap(item => alternating.largeStarMap(item))
 56 | 
 57 | 				stats.countMessage(ret, step)
 58 | 
 59 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).flatMap(alternating.largeStarReduce)
 60 | 				
 61 | 				stats.countMessage(ret, step)
 62 | 				
 63 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).cache
 64 | 				
 65 | 				val timeStepLarge = System.currentTimeMillis()
 66 | 				util.io.printTime(timeStepStart, timeStepLarge, "large")
 67 | 				util.printTimeStep(step, timeStepLarge-timeStepStart)
 68 | 				stats.printSimplificationAlternating(step+1, ret)
 69 | 
 70 | 				ret = ret.flatMap(alternating.smallStarReduce)
 71 | 				
 72 | 				stats.countMessage(ret, step)
 73 | 				
 74 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).cache
 75 | 
 76 | 				val rddForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache
 77 | 				control = previousRDDForConvergence.leftOuterJoin(rddForConvergence).map(t => if(t._2._2.isDefined) t._2._1 == t._2._2.get else false).cache.reduce{case(a,b) => a&&b}
 78 | 				previousRDDForConvergence = rddForConvergence
 79 | 				
 80 | 				val timeStepSmall = System.currentTimeMillis()
 81 | 
 82 | 				step = step + 3
 83 | 				util.io.printTime(timeStepLarge, timeStepSmall, "small")
 84 | 				util.printTimeStep(step+1, timeStepSmall-timeStepLarge)
 85 | 			}
 86 | 
 87 | 			val timeEnd = System.currentTimeMillis()
 88 | 			
 89 | 			util.testEnded(	ret.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2)), 
 90 |             				step, 
 91 |             				timeBegin, 
 92 |             				timeEnd, 
 93 |             				timeSparkLoaded, 
 94 |             				timeDataLoaded, 
 95 |             				reduceInputMessageNumberAccumulator.value, 
 96 |             				reduceInputSizeAccumulator.value)
 97 | 			
 98 | 		}
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/hashToMin/HashToMinMain.scala:
--------------------------------------------------------------------------------
  1 | package hashToMin
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.apache.spark.SparkContext._
  6 | import org.apache.spark.SparkContext
  7 | import org.apache.spark.rdd.RDD
  8 | import util.CCUtil
  9 | import util.CCProperties
 10 | import util.CCProperties
 11 | 
 12 | object HashToMinMain {
 13 |   def emitBlue(item: (Long, HashToMinMessage)): Iterable[(Long, HashToMinMessage)] = {
 14 |     var outputList: ListBuffer[(Long, HashToMinMessage)] = new ListBuffer
 15 | 
 16 |     val min = item._2.min
 17 | 
 18 |     val it = item._2.cc.iterator
 19 | 
 20 |     if (min == item._1) {
 21 |       outputList.prepend((item._1, new HashToMinMessage(min, item._2.cc, item._2.cc.size)))
 22 |     } else {
 23 |       outputList.prepend((item._1, new HashToMinMessage(min, Set(min), item._2.cc.size)))
 24 |     }
 25 | 
 26 |     while (it.hasNext) {
 27 |       val next = it.next
 28 | 
 29 |       if (next != item._1) {
 30 |         if (next == min) {
 31 |           outputList.prepend((next, new HashToMinMessage(min, item._2.cc, -1)))
 32 |         } else {
 33 |           outputList.prepend((next, new HashToMinMessage(min, Set(min), -1)))
 34 |         }
 35 |       }
 36 |     }
 37 | 
 38 |     outputList.toIterable
 39 |   }
 40 | 
 41 |   def reduceBlue(item1: HashToMinMessage, item2: HashToMinMessage): HashToMinMessage = {
 42 |     val ret = item1.cc ++ item2.cc
 43 |     val min = Math.min(item1.min, item2.min)
 44 |     var size = item1.sizeBefore
 45 |     if (size == -1) size = item2.sizeBefore
 46 |     new HashToMinMessage(min, ret, size)
 47 |   }
 48 | 
 49 |   def main(args: Array[String]): Unit = {
 50 |     val timeBegin = System.currentTimeMillis()
 51 | 
 52 |     val property = new CCProperties("HASHTOMIN", args(0)).load.getImmutable
 53 | 
 54 |     val util = new CCUtil(property)
 55 |     val spark = util.getSparkContext()
 56 | 
 57 |     val timeSparkLoaded = System.currentTimeMillis()
 58 |     val file = spark.textFile(property.dataset, property.sparkPartition)
 59 | 
 60 |     util.io.printFileStart(property.appName)
 61 | 
 62 |     //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 63 |     val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 64 | 
 65 |     var ret = fusedData.map(item => (item._1, new HashToMinMessage(item._2.toSet.min, item._2.toSet, -1)))
 66 | 
 67 |     val timeDataLoaded = System.currentTimeMillis()
 68 | 
 69 |     var control = false;
 70 |     var step = 0
 71 | 
 72 |     val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
 73 |     val reduceInputSizeAccumulator = spark.accumulator(0L)
 74 | 
 75 |     while (!control) {
 76 |       val timeStepStart = System.currentTimeMillis()
 77 | 
 78 |       val previous = ret
 79 |       val retMap = ret.flatMap(item => emitBlue(item))
 80 | 
 81 |       if (property.printMessageStat) {
 82 |         val previousMessageSize = reduceInputSizeAccumulator.value
 83 |         val previousMessageNumber = reduceInputMessageNumberAccumulator.value
 84 | 
 85 |         retMap.foreach(t => reduceInputSizeAccumulator += t._2.cc.size + 2)
 86 |         reduceInputMessageNumberAccumulator += retMap.count
 87 | 
 88 |         util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
 89 |       }
 90 | 
 91 |       ret = retMap.reduceByKey(reduceBlue).cache
 92 |       ret.foreach(x => {})
 93 | 
 94 |       val controlMap = ret.map(t => t._2.voteToHalt)
 95 |       control = controlMap.reduce { case (a, b) => a && b }
 96 |       //                try
 97 |       //                {
 98 |       //                	control = controlMap.reduce{case (a,b) => a && b}
 99 |       //                }
100 |       //                catch
101 |       //                {
102 |       //                	case e : Exception => control = false
103 |       //                }
104 | 
105 |       val timeStepBlue = System.currentTimeMillis()
106 | 
107 |       step = step + 1
108 |       util.io.printTime(timeStepStart, timeStepBlue, "blue")
109 |       util.printTimeStep(step, timeStepBlue - timeStepStart)
110 | 
111 |       ret.checkpoint
112 |       previous.unpersist(false)
113 |       retMap.unpersist(false)
114 |       controlMap.unpersist(false)
115 |     }
116 | 
117 |     val timeEnd = System.currentTimeMillis()
118 | 
119 |     util.testEnded(ret.map(t => (t._2.min, t._2.cc.size)).reduceByKey { case (a, b) => Math.max(a, b) },
120 |       step,
121 |       timeBegin,
122 |       timeEnd,
123 |       timeSparkLoaded,
124 |       timeDataLoaded,
125 |       reduceInputMessageNumberAccumulator.value,
126 |       reduceInputSizeAccumulator.value)
127 |   }
128 | 
129 | }


--------------------------------------------------------------------------------
/src/hashMin/HashMinMain.scala:
--------------------------------------------------------------------------------
  1 | package hashMin
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.apache.spark.SparkContext._
  6 | import org.apache.spark.SparkContext
  7 | import org.apache.spark.rdd.RDD
  8 | import util.CCUtil
  9 | import util.CCProperties
 10 | 
 11 | object HashMinMain 
 12 | {
 13 |     def emitBlue( item : ( Long, HashMinMessage ) ) : Iterable[( Long, HashMinMessage )] =
 14 |         {
 15 |             var outputList : ListBuffer[( Long, HashMinMessage )] = new ListBuffer
 16 |             
 17 |             val min = item._2.min
 18 | 
 19 |             val it = item._2.neigh.iterator
 20 |             
 21 |             outputList.prepend( ( item._1, new HashMinMessage( min, min, item._2.neigh) ) )
 22 | 
 23 |             while(it.hasNext)
 24 |             {
 25 |                 val next = it.next
 26 |                 
 27 |                 if(next != item._1)
 28 |                 {
 29 |                     outputList.prepend( ( next, new HashMinMessage( min, -1 , Set()) ) )
 30 |                 }
 31 |             }
 32 |          
 33 |             outputList.toIterable
 34 |         }
 35 | 
 36 |     def reduceBlue( item1 : HashMinMessage, item2 : HashMinMessage ) : HashMinMessage = 
 37 |     {
 38 |         val ret = item1.neigh ++ item2.neigh
 39 |         val min = Math.min( item1.min, item2.min )
 40 |         var minBefore = item1.minBefore
 41 |         if(minBefore == -1) minBefore = item2.minBefore
 42 |         new HashMinMessage( min, minBefore , ret )
 43 |     }
 44 | 
 45 |     def main( args : Array[String] ) : Unit =
 46 |         {
 47 |           val timeBegin = System.currentTimeMillis()
 48 | 
 49 |             val property = new CCProperties("PEGASUS", args(0)).load.getImmutable
 50 |             
 51 |             val util = new CCUtil(property)
 52 |             val spark = util.getSparkContext()
 53 |             
 54 |             val timeSparkLoaded = System.currentTimeMillis()
 55 |             val file = spark.textFile( property.dataset , property.sparkPartition)
 56 | 
 57 |             util.io.printFileStart(property.appName)
 58 |             
 59 | //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 60 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 61 |             var ret = fusedData.map( item => ( item._1, new HashMinMessage( item._2.toSet.min, -1, item._2.toSet) ) )
 62 | 
 63 |             val timeDataLoaded = System.currentTimeMillis()
 64 | 
 65 |             var control = false;
 66 |             var step = 0
 67 |             
 68 |             val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
 69 |             val reduceInputSizeAccumulator = spark.accumulator(0L)
 70 | 
 71 |             while ( !control ) {
 72 |                 val timeStepStart = System.currentTimeMillis()
 73 | 
 74 |                 val previous = ret
 75 |                 val mapResult = ret.flatMap( item => emitBlue( item ) )
 76 |                 
 77 |                 if(property.printMessageStat)
 78 |                 {
 79 |                 	val previousMessageSize = reduceInputSizeAccumulator.value
 80 | 						val previousMessageNumber = reduceInputMessageNumberAccumulator.value
 81 | 						
 82 |                 	mapResult.foreach(t => reduceInputSizeAccumulator += t._2.neigh.size + 2)
 83 |                 	reduceInputMessageNumberAccumulator += mapResult.count
 84 |                 	
 85 |                 	util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
 86 |                 }
 87 |                 
 88 |                 ret = mapResult.reduceByKey( reduceBlue ).cache
 89 | 
 90 |                 val controlMap = ret.map(t => t._2.voteToHalt)
 91 | //                val check = controlMap.filter(t=> (!t)).count
 92 | //                util.io.printStat(check, "active")
 93 |                 control = controlMap.reduce{case (a,b) => a && b}
 94 | 
 95 |                 val timeStepBlue = System.currentTimeMillis()
 96 | 
 97 |                 step = step + 1
 98 |                 util.io.printTime( timeStepStart, timeStepBlue, "blue" )
 99 |                 util.printTimeStep(step, timeStepBlue-timeStepStart)
100 |                 
101 |                 ret.checkpoint
102 |                 mapResult.unpersist(false)
103 |                 previous.unpersist(false)
104 |                 controlMap.unpersist(false)
105 |             }
106 | 
107 |             val timeEnd = System.currentTimeMillis()
108 |             
109 |             util.testEnded(	ret.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}, 
110 |             				step, 
111 |             				timeBegin, 
112 |             				timeEnd, 
113 |             				timeSparkLoaded, 
114 |             				timeDataLoaded, 
115 |             				reduceInputMessageNumberAccumulator.value, 
116 |             				reduceInputSizeAccumulator.value)
117 |         }
118 | 
119 | }


--------------------------------------------------------------------------------
/src/util/CCProperties.scala:
--------------------------------------------------------------------------------
  1 | package util
  2 | 
  3 | import java.util.Properties
  4 | import java.io.InputStream
  5 | import java.io.FileInputStream
  6 | 
  7 | 
  8 | class CCProperties(algorithmName: String, configurationFile : String) extends Serializable
  9 | {
 10 | 	val property = new Properties
 11 | 	
 12 | 	def load() : CCProperties =
 13 | 	{
 14 | 		var input : InputStream = null
 15 |  
 16 | 		input = new FileInputStream(configurationFile);
 17 |  
 18 | 		property.load(input);
 19 | 		
 20 | 		this
 21 | 	}
 22 | 	
 23 | 	def get(data : String, default : String) =
 24 | 	{
 25 | 		property.getProperty(data, default)
 26 | 	}
 27 | 	
 28 | 	def getBoolean(data : String, default : Boolean) =
 29 | 	{
 30 | 		get(data, default.toString).toBoolean
 31 | 	}
 32 | 	
 33 | 	def getInt(data : String, default : Int) =
 34 | 	{
 35 | 		get(data, default.toString).toInt
 36 | 	}
 37 | 	
 38 | 	def getDouble(data : String, default : Double) =
 39 | 	{
 40 | 		get(data, default.toString).toDouble
 41 | 	}
 42 | 	
 43 | 	def getImmutable : CCPropertiesImmutable =
 44 | 	{
 45 | 		val dataset = get("dataset", "")
 46 | 		val dataset2 = get("dataset2", "")
 47 | 		val jarPath = get("jarPath", "")
 48 | 		val sparkMaster = get("sparkMaster", "local[2]")
 49 | 		val sparkExecutorMemory = get("sparkExecutorMemory", "14g")
 50 | 		val sparkPartition = get("sparkPartition", "32").toInt
 51 | 		val sparkBlockManagerSlaveTimeoutMs= get("sparkBlockManagerSlaveTimeoutMs", "45000")
 52 | 		val sparkCoresMax = get("sparkCoresMax", "-1").toInt
 53 | 		val sparkAkkaFrameSize = get("sparkAkkaFrameSize", "100").toString
 54 | 		val sparkShuffleManager = get("sparkShuffleManager", "SORT").toString
 55 | 		val sparkCompressionCodec = get("sparkCompressionCodec", "snappy").toString
 56 | 		val sparkShuffleConsolidateFiles = get("sparkShuffleConsolidateFiles", "false").toString
 57 | 		val sparkDriverMaxResultSize = get("sparkDriverMaxResultSize", "1g").toString
 58 | 		var separator = get("edgelistSeparator", "space")
 59 | 		var separatorCC = get("edgelistSeparatorCC", "space")
 60 | 		if(separator.equals("space")) separator = " "
 61 | 		if(separatorCC.equals("space")) separatorCC = " "
 62 | 		val printMessageStat = get("printMessageStat", "false").toBoolean
 63 | 		val printLargestCC = get("printLargestCC", "false").toBoolean
 64 | 		val printCC = get("printCC", "false").toBoolean
 65 | 		val printCCDistribution = get("printCCDistribution", "false").toBoolean
 66 | 		val printAll = get("printAll", "false").toBoolean
 67 | 		val customColumnValue = get("customColumnValue", "")
 68 | 		val algorithmNameFromConfiguration = get("algorithmName", algorithmName)
 69 | 		val switchLocal = get("switchLocal", "0").toInt
 70 | 		val switchLocalActive = switchLocal != -1 
 71 | 		val vertexIdMultiplier = get("vertexIdMultiplier", "-1").toInt
 72 | 		val loadBalancing = get("loadBalancing", "false").toBoolean
 73 | 		val vertexNumber = get("vertexNumber", "-1").toInt
 74 | 		val outputFile = get("outputFile", "")
 75 | 		val outputFileCC = get("outputFileCC", "")
 76 | 		val coreThreshold = getInt("coreThreshold", 10)
 77 | 		val invert = get("invert", "false").toBoolean
 78 | 		
 79 | 		//############# WITH YARN
 80 | 		val sparkExecutorInstances = get("sparkExecutorInstances", "-1").toInt
 81 | 		
 82 | 		//################## DIAMETER
 83 | 		val selfFunction = get("selfFunction", "DISTANCE_MAX")
 84 | 		
 85 | 		
 86 | 		val candidateFunction = get("candidateFunction", "MAX")
 87 | 		
 88 | 		val selfStar = get("selfStar", "true").toBoolean
 89 | 		val transmitPreviousNeighbours = get("transmitPreviousNeighbours", "true").toBoolean
 90 | 		val edgeThreshold = getDouble("edgeThreshold", -1)
 91 | 		
 92 | 		new CCPropertiesImmutable(	algorithmNameFromConfiguration, 
 93 | 		        					dataset, 
 94 | 		        					dataset2,
 95 | 		        					outputFile,
 96 | 		        					outputFileCC,
 97 | 		        					jarPath, 
 98 | 		        					sparkMaster, 
 99 | 		        					sparkPartition, 
100 | 		        					sparkExecutorMemory, 
101 | 		        					sparkBlockManagerSlaveTimeoutMs, 
102 | 		        					sparkCoresMax, 
103 | 		        					sparkShuffleManager, 
104 | 		        					sparkCompressionCodec,
105 | 		        					sparkShuffleConsolidateFiles,
106 | 		        					sparkAkkaFrameSize,
107 | 		        					sparkDriverMaxResultSize,
108 | 		        					sparkExecutorInstances,
109 | 		        					separator, 
110 | 		        					separatorCC,
111 | 		        					printMessageStat, 
112 | 		        					printLargestCC,
113 | 		        					printCC,
114 | 		        					printCCDistribution, 
115 | 		        					printAll, 
116 | 		        					customColumnValue, 
117 | 		        					switchLocal, 
118 | 		        					switchLocalActive,
119 | 		        					vertexIdMultiplier,
120 | 		        					vertexNumber,
121 | 		        					loadBalancing,
122 | 		        					selfFunction,
123 | 		        					candidateFunction,
124 | 		        					selfStar,
125 | 		        					transmitPreviousNeighbours,
126 | 		        					edgeThreshold,
127 | 		        					coreThreshold,
128 | 		        					invert)
129 | 	}
130 | }


--------------------------------------------------------------------------------
/src/ccf/CcfMain.scala:
--------------------------------------------------------------------------------
  1 | package ccf
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.immutable.TreeSet
  5 | import scala.collection.mutable.ListBuffer
  6 | import org.apache.spark.Accumulator
  7 | import org.apache.spark.SparkContext._
  8 | import org.apache.spark.SparkContext
  9 | import org.apache.spark.rdd.RDD
 10 | import util.CCUtil
 11 | import util.CCProperties
 12 | 
 13 | // TODO fail to recognize component of size = 1
 14 | object CcfMain {
 15 | 	def ccfIterateMap(item : (Long, Long)) : Iterable[(Long, Long)] =
 16 | 		{
 17 | 			var outputList : ListBuffer[(Long, Long)] = new ListBuffer
 18 | 
 19 | 			outputList.prepend((item._1, item._2))
 20 | 			outputList.prepend((item._2, item._1))
 21 | 
 22 | 			outputList.toIterable
 23 | 		}
 24 | 
 25 | 	def ccfIterateReduce(item : (Long, Iterable[Long])) : Iterable[(Long, Long)] =
 26 | 		{
 27 | 			var terminate = true
 28 | 			var outputList : ListBuffer[(Long, Long)] = new ListBuffer
 29 | 
 30 | 			var min = item._1
 31 | 			val it = item._2.iterator
 32 | 			var valueList : List[Long] = List()
 33 | 
 34 | 			while (it.hasNext) {
 35 | 				val next = it.next
 36 | 				valueList = next :: valueList
 37 | 				if (next < min) {
 38 | 					min = next
 39 | 				}
 40 | 			}
 41 | 
 42 | 			if (min < item._1) {
 43 | 				outputList.prepend((item._1, min))
 44 | 				val it2 = valueList.iterator
 45 | 				while (it2.hasNext) {
 46 | 					val next = it2.next
 47 | 					if (min != next) {
 48 | 						outputList.prepend((next, min))
 49 | 						terminate = false
 50 | 					}
 51 | 				}
 52 | 			}
 53 | 
 54 | 			if (!terminate) {
 55 | 				// ack! ugly!
 56 | 				outputList.prepend((-1, min))
 57 | 			}
 58 | 
 59 | 			outputList.toIterable
 60 | 		}
 61 | 
 62 | 	def ccfDedupMap(item : (Long, Long)) : ((Long, Long), Long) =
 63 | 		{
 64 | 			((item._1, item._2), -1)
 65 | 		}
 66 | 
 67 | 	def ccfDedupReduce(item : ((Long, Long), Iterable[Long])) : (Long, Long) =
 68 | 		{
 69 | 			(item._1._1, item._1._2)
 70 | 		}
 71 | 
 72 | 	def reduceBlue(item1 : CcfMessage, item2 : CcfMessage) : CcfMessage =
 73 | 		{
 74 | 			new CcfMessage(item1.cc ++ item2.cc, item1.terminate || item2.terminate)
 75 | 		}
 76 | 
 77 | 	def main(args : Array[String]) : Unit =
 78 | 		{
 79 | 			val timeBegin = System.currentTimeMillis()
 80 | 
 81 |             val property = new CCProperties("CCF", args(0)).load.getImmutable
 82 |             
 83 |             val util = new CCUtil(property)
 84 | 			
 85 |             val spark = util.getSparkContext()
 86 |             
 87 |             val timeSparkLoaded = System.currentTimeMillis()
 88 |             val file = spark.textFile( property.dataset , property.sparkPartition)
 89 | 
 90 |             util.io.printFileStart(property.appName)
 91 |             
 92 | //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 93 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 94 | 
 95 | 			var ret = parsedData //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) )
 96 | 
 97 | 			val timeDataLoaded = System.currentTimeMillis()
 98 | 
 99 | 			var control = false;
100 | 			var step = 0
101 | 
102 | 			val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
103 | 			val reduceInputSizeAccumulator = spark.accumulator(0L)
104 | 
105 | 			while (!control) {
106 | 				val timeStepStart = System.currentTimeMillis()
107 | 
108 | 				var tmp = ret.flatMap(item => ccfIterateMap(item)).groupByKey
109 | 
110 | 				var previousMessageSize = 0L
111 | 				var previousMessageNumber = 0L
112 | 				
113 | 				if (property.printMessageStat) {
114 | 					previousMessageSize = reduceInputSizeAccumulator.value
115 | 					previousMessageNumber = reduceInputMessageNumberAccumulator.value
116 | 					
117 | 					tmp.foreach(t => reduceInputSizeAccumulator += t._2.size + 1)
118 | 					reduceInputMessageNumberAccumulator += tmp.count
119 | 				}
120 | 
121 | 				ret = tmp.flatMap(ccfIterateReduce)
122 | 
123 | 				control = ret.filter(t => t._1 == -1).count == 0
124 | 
125 | 				ret = ret.filter(t => t._1 != -1)
126 | 
127 | 				val tmp2 = ret.map(item => ccfDedupMap(item)).groupByKey
128 | 
129 | 				if (property.printMessageStat) {
130 | 					tmp2.foreach(t => reduceInputSizeAccumulator += 3)
131 | 					reduceInputMessageNumberAccumulator += tmp2.count
132 | 					
133 | 					util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
134 | 				}
135 | 
136 | 				ret = tmp2.map(ccfDedupReduce)
137 | 
138 | 				val timeStepBlue = System.currentTimeMillis()
139 | 
140 | 				step = step + 1
141 | 				util.io.printTime(timeStepStart, timeStepBlue, "blue")
142 | 				util.printTimeStep(step, timeStepBlue-timeStepStart)
143 | 			}
144 | 
145 | 			val timeEnd = System.currentTimeMillis()
146 | 			
147 | 			util.testEnded(	ret.map(t=> (t._2, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2 + 1)), 
148 |             				step, 
149 |             				timeBegin, 
150 |             				timeEnd, 
151 |             				timeSparkLoaded, 
152 |             				timeDataLoaded, 
153 |             				reduceInputMessageNumberAccumulator.value, 
154 |             				reduceInputSizeAccumulator.value)
155 | 			
156 | 		}
157 | }
158 | 


--------------------------------------------------------------------------------
/src/alternatingOptimized/AlternatingOptimizedMain.scala:
--------------------------------------------------------------------------------
  1 | package alternatingOptimized
  2 | 
  3 | import scala.collection.mutable.ListBuffer
  4 | import org.apache.spark.Accumulator
  5 | import org.apache.spark.SparkContext._
  6 | import org.apache.spark.SparkContext
  7 | import org.apache.spark.rdd.RDD
  8 | import cracker.CrackerStats
  9 | import util.CCProperties
 10 | import util.CCUtil
 11 | import com.google.common.base.Joiner
 12 | import java.io.FileWriter
 13 | import alternating.AlternatingAlgorithm
 14 | import alternating.AlternatingStats
 15 | 
 16 | object AlternatingOptimizedMain {
 17 | 
 18 | 
 19 | 	def main(args : Array[String]) : Unit =
 20 | 		{
 21 | 			val timeBegin = System.currentTimeMillis()
 22 | 
 23 |             val property = new CCProperties("ALTERNATINGOPTIMIZED", args(0)).load.getImmutable
 24 |             
 25 |             val util = new CCUtil(property)
 26 | 			
 27 |             val spark = util.getSparkContext()
 28 |             val alternating = new AlternatingAlgorithm
 29 |             val stats = new AlternatingStats(property, util, spark)
 30 |             
 31 |             val timeSparkLoaded = System.currentTimeMillis()
 32 |             val file = spark.textFile( property.dataset , property.sparkPartition)
 33 | 
 34 |             util.io.printFileStart(property.appName)
 35 |             util.io.printStat(property.vertexIdMultiplier, "idMultiplier")
 36 |             
 37 | //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 38 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 39 |             
 40 | //			var ret = fusedData.flatMap(alternating.generateInitialEdge).reduceByKey(alternating.reduceMessageByKey).cache //.map( item => ( item._1, new CcfMessage( toTreeSet(item._2.toSet), false) ) )
 41 | 			var ret = fusedData.map(t => (t._1, t._2.toSet)).cache
 42 | 			
 43 | 			val timeDataLoaded = System.currentTimeMillis()
 44 | 			ret.count
 45 | 			
 46 | 			var control = false;
 47 | 			var step = 0
 48 | 
 49 | 			var previousRDDForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache
 50 | 			previousRDDForConvergence.count
 51 | 			
 52 | 			while (!control) {
 53 | 				val timeStepStart = System.currentTimeMillis()
 54 | 
 55 | 				stats.printSimplificationAlternating(step, ret)
 56 | 				var previousRet = ret
 57 | 				ret = ret.flatMap(item => alternating.largeStarMapOptimized(item, property.vertexIdMultiplier)).cache
 58 | 				
 59 | 				ret.first()
 60 | 				previousRet.unpersist()
 61 | 				
 62 | 				stats.countMessage(ret, step + 1)
 63 | 				
 64 | 				previousRet = ret
 65 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).flatMap(item => alternating.largeStarReduceOptimized(item))
 66 | 				
 67 | 				stats.countMessage(ret, step + 2)
 68 | 				
 69 | 				var previousRet2 = ret
 70 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).cache
 71 | 				
 72 | 				ret.first()
 73 | 				previousRet.unpersist()
 74 | 				previousRet2.unpersist()
 75 | 				
 76 | 				val timeStepLarge = System.currentTimeMillis()
 77 | 				util.io.printTime(timeStepStart, timeStepLarge, "large")
 78 | 				util.printTimeStep(step, timeStepLarge-timeStepStart)
 79 | 				stats.printSimplificationAlternating(step+1, ret)
 80 | 
 81 | 				previousRet = ret
 82 | 				ret = ret.flatMap(alternating.smallStarReduce)
 83 | 				
 84 | 				stats.countMessage(ret, step + 3)
 85 | 				
 86 | 				previousRet2 = ret
 87 | 				ret = ret.reduceByKey(alternating.reduceMessageByKey).cache
 88 | 
 89 | 				val rddForConvergence = ret.map(t => (t._1, Math.min(t._2.min, t._1))).cache
 90 | 				control = rddForConvergence.leftOuterJoin(previousRDDForConvergence).map(t => if(t._2._2.isDefined) t._2._1 == t._2._2.get else false).cache.reduce{case(a,b) => a&&b}
 91 | 				previousRDDForConvergence = rddForConvergence
 92 | 				
 93 | 				val timeStepSmall = System.currentTimeMillis()
 94 | 
 95 | 				stats.printSimplificationAlternating(step + 3, ret)
 96 | 				step = step + 3
 97 | 				util.io.printTime(timeStepLarge, timeStepSmall, "small")
 98 | 				util.printTimeStep(step+1, timeStepSmall-timeStepLarge)
 99 | 				
100 | 				ret.first()
101 | 				previousRet.unpersist()
102 | 				previousRet2.unpersist()
103 | 			}
104 | 
105 | 			val timeAdjustingAdditionalVertexForLoadBalancingStart = System.currentTimeMillis()
106 | 			
107 | 			val rddLabeled = ret.map(t=> (t._1, t._2.min))
108 | 			val rddLabeledInverted = rddLabeled.map(t=> (t._2, t._1))
109 | 			
110 | 			val resultJoin = rddLabeledInverted.leftOuterJoin(rddLabeled).map(t=>(t._2._1, t._2._2)).filter(t=>t._2.isDefined).map(t=>(t._1,t._2.get))
111 | 			val result = rddLabeled.leftOuterJoin(resultJoin).map(t=> if(t._2._2.isDefined) (t._1, Math.min(t._2._1,t._2._2.get)) else (t._1, t._2._1))
112 | 			
113 | 			val timeEnd = System.currentTimeMillis()
114 | 			util.io.printTime(timeAdjustingAdditionalVertexForLoadBalancingStart, timeEnd, "timeAdjustingAdditionalVertexForLoadBalancingStart")
115 | 			
116 | 			util.testEnded(
117 | 					result.filter(t => t._1%property.vertexIdMultiplier==0).groupByKey.map(t=> (t._2.min, 1)).reduceByKey{case (a,b)=> a+b}.map(t=>(t._1, t._2)),
118 |             				step, 
119 |             				timeBegin, 
120 |             				timeEnd, 
121 |             				timeSparkLoaded, 
122 |             				timeDataLoaded, 
123 |             				stats.reduceInputMessageNumberAccumulator.value,
124 |             				stats.reduceInputSizeAccumulator.value)
125 | 			
126 | 		}
127 | }
128 | 


--------------------------------------------------------------------------------
/src/alternating/AlternatingAlgorithm.scala:
--------------------------------------------------------------------------------
  1 | package alternating
  2 | 
  3 | import scala.collection.mutable.ListBuffer
  4 | 
  5 | @serializable
  6 | class AlternatingAlgorithm {
  7 | 	def generateInitialEdge(item : (Long, Iterable[Long])) : Iterable[(Long, Set[Long])] =
  8 | 	{
  9 | 		var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
 10 | 		
 11 | 		val it = item._2.toSet.iterator
 12 | 		while (it.hasNext) {
 13 | 				val next = it.next
 14 | 				outputList.prepend((item._1, Set(next)))
 15 | 			}
 16 | 		
 17 | 		outputList.toIterable
 18 | 	}
 19 | 	
 20 | 	def smallStarMap(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] =
 21 | 		{
 22 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
 23 | 
 24 | 			val it2 = item._2.iterator
 25 | 			while (it2.hasNext) {
 26 | 				val next = it2.next
 27 | 				if(next <= item._1)
 28 | 				{
 29 | 					outputList.prepend((item._1, Set(next)))
 30 | 				} else
 31 | 				{
 32 | 					outputList.prepend((next, Set(item._1)))
 33 | 				}
 34 | 			}
 35 | 
 36 | 			outputList.toIterable
 37 | 		}
 38 | 	
 39 | 	def smallStarReduce(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] =
 40 | 		{
 41 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
 42 | 
 43 | 			var min = Math.min( item._1, item._2.min)
 44 | 			val it2 = item._2.iterator
 45 | //			var valueList : Set[Long] = Set()
 46 | //
 47 | //			while (it.hasNext) {
 48 | //				val next = it.next
 49 | //				valueList = valueList + next
 50 | //				if (next < min) {
 51 | //					min = next
 52 | //				}
 53 | //			}
 54 | 
 55 | //			val it2 = valueList.iterator
 56 | 			while (it2.hasNext) {
 57 | 				val next = it2.next
 58 | 				outputList.prepend((next, Set(min)))
 59 | 			}
 60 | 			
 61 | 			outputList.prepend((item._1, Set(min)))
 62 | 
 63 | 			outputList.toIterable
 64 | 		}
 65 | 	
 66 | 	def largeStarMapOptimized(item: (Long, Set[Long]), limit : Int) : Iterable[(Long, Set[Long])] =
 67 | 		{
 68 | 			val sizeNeighborhood = item._2.size
 69 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
 70 | 
 71 | //			if(info.isDefined && info.get.isMarkedAsRootNode)
 72 | //			{
 73 | //				outputList.prepend((Option(item._2, item._1), Option.empty))
 74 | //			} 
 75 | //			else 
 76 | 				
 77 | 			val it = item._2.iterator
 78 | 			
 79 | 			if(item._1 == item._2.min)
 80 | 			{
 81 | 				while(it.hasNext)
 82 | 				{
 83 | 					val next = it.next
 84 | 					outputList.prepend((next, Set(item._1)))
 85 | 				}
 86 | 			}
 87 | 			else if(sizeNeighborhood > limit && item._1 %limit==0)
 88 | 			{
 89 | 				while(it.hasNext)
 90 | 				{
 91 | 					val next = it.next
 92 | 					val hash = item._1 + (next % (limit-1)) + 1
 93 | 					outputList.prepend((item._1, Set(hash)))
 94 | 					outputList.prepend((hash, Set(next)))
 95 | 				}
 96 | 				
 97 | 			}
 98 | 			else
 99 | 			{
100 | 				while(it.hasNext)
101 | 				{
102 | 					val next = it.next
103 | 					outputList.prepend((item._1, Set(next)))
104 | 					outputList.prepend((next, Set(item._1)))
105 | 				}
106 | 				
107 | 			}
108 | 			
109 | 			outputList.toIterable
110 | 		}
111 | 	
112 | 	def reduceMessageByKey(a : Set[Long], b : Set[Long]) : Set[Long] =
113 | 	{
114 | 		a++b
115 | 	}
116 | 	
117 | 		def largeStarReduceOptimized(item: (Long, Set[Long])) : Iterable[(Long, Set[Long])] =
118 | 		{
119 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
120 | 
121 | 			var min = Math.min(item._1, item._2.min)
122 | 			val it2 = item._2.iterator
123 | 			var valueList : Set[Long] = Set()
124 | 
125 | //			while (it.hasNext) {
126 | //				val next = it.next
127 | //				valueList = valueList + next
128 | //				if (next < min) {
129 | //					min = next
130 | //				}
131 | //			}
132 | //
133 | //			val it2 = valueList.iterator
134 | 			while (it2.hasNext) {
135 | 				val next = it2.next
136 | 				if (next > item._1) {
137 | 					outputList.prepend((next, Set(min)))
138 | 				}
139 | 			}
140 | 			
141 | 			outputList.prepend((item._1, Set(min)))
142 | 				
143 | //			outputList.prepend((Option.empty, Option(item._1, new AlternatingMessage(item._1 == min))))
144 | 
145 | 			outputList.toIterable
146 | 		}
147 | 		
148 | 		def largeStarMap(item: (Long, Set[Long])) : Iterable[(Long, Set[Long])] =
149 | 		{
150 | 			val sizeNeighborhood = item._2.toSet.size
151 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
152 | 
153 | //			if(info.isDefined && info.get.isMarkedAsRootNode)
154 | //			{
155 | //				outputList.prepend((Option(item._2, item._1), Option.empty))
156 | //			} 
157 | //			else 
158 | 				
159 | 			val it = item._2.iterator
160 | 			
161 | 			while(it.hasNext)
162 | 			{
163 | 				val next = it.next
164 | 				outputList.prepend((item._1, Set(next)))
165 | 				outputList.prepend((next, Set(item._1)))
166 | 			}
167 | 			
168 | 			outputList.toIterable
169 | 		}
170 | 	
171 | 	def largeStarReduce(item : (Long, Set[Long])) : Iterable[(Long, Set[Long])] =
172 | 		{
173 | 			var outputList : ListBuffer[(Long, Set[Long])] = new ListBuffer
174 | 
175 | 			var min = item._1
176 | 			val it = item._2.iterator
177 | 			var valueList : Set[Long] = Set()
178 | 
179 | 			while (it.hasNext) {
180 | 				val next = it.next
181 | 				valueList = valueList + next
182 | 				if (next < min) {
183 | 					min = next
184 | 				}
185 | 			}
186 | 
187 | 			val it2 = valueList.iterator
188 | 			while (it2.hasNext) {
189 | 				val next = it2.next
190 | 				if (next > item._1) {
191 | 					outputList.prepend((next, Set(min)))
192 | 				}
193 | 			}
194 | 			
195 | 			outputList.prepend((item._1, Set(min)))
196 | 
197 | 			outputList.toIterable
198 | 		}
199 | }


--------------------------------------------------------------------------------
/src/ccmr/CcmrMain.scala:
--------------------------------------------------------------------------------
  1 | package ccmr
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.immutable.TreeSet
  5 | import scala.collection.mutable.ListBuffer
  6 | import org.apache.spark.Accumulator
  7 | import org.apache.spark.SparkContext._
  8 | import org.apache.spark.SparkContext
  9 | import org.apache.spark.rdd.RDD
 10 | import util.CCUtil
 11 | import util.CCProperties
 12 | 
 13 | // not working for livejournal dataset
 14 | object CcmrMain 
 15 | {
 16 |     def emitBlue( item : ( Long, CcmrMessage ) ) : Iterable[( Long, CcmrMessage )] =
 17 |         {
 18 |             var outputList : ListBuffer[( Long, CcmrMessage )] = new ListBuffer
 19 |             
 20 |             val vSource = item._1
 21 |             val it = item._2.cc.iterator
 22 |             
 23 |             var isLocalMaxState = false
 24 |             if(it.hasNext)
 25 |             {
 26 | 	            val vFirst = it.next
 27 | 	            
 28 | 	            if(vSource < vFirst)
 29 | 	            {
 30 | 	            	isLocalMaxState = true
 31 | 	            	outputList.prepend((vSource, new CcmrMessage(TreeSet(vFirst), false)))
 32 | 	            } 
 33 | 	            
 34 | 	            var vDest = vFirst
 35 | 	            while(it.hasNext)
 36 | 	            {
 37 | 	            	vDest = it.next
 38 | 	            	if(isLocalMaxState)
 39 | 	            	{
 40 | 	            		outputList.prepend((vSource, new CcmrMessage(TreeSet(vDest), false)))
 41 | 	            	} else
 42 | 	            	{
 43 | 	            		outputList.prepend((vFirst, new CcmrMessage(TreeSet(vDest), false)))
 44 | 	            		outputList.prepend((vDest, new CcmrMessage(TreeSet(vFirst), false)))
 45 | 	            		outputList.prepend((vSource, new CcmrMessage(TreeSet(), true)))
 46 | 	            	}
 47 | 	            }
 48 | 	            if(vSource < vDest && !isLocalMaxState)
 49 | 	            {
 50 | 	            	outputList.prepend((vSource, new CcmrMessage(TreeSet(vFirst), true)))
 51 | 	            }
 52 | 	            
 53 |             } else
 54 |             {
 55 |             	//outputList.prepend((vSource, new CcmrMessage(TreeSet(), false)))
 56 |             }
 57 |             
 58 |             
 59 |          
 60 |             outputList.toIterable 
 61 |         }
 62 | 
 63 |     def reduceBlue( item1 : CcmrMessage, item2 : CcmrMessage ) : CcmrMessage = 
 64 |     {
 65 |         new CcmrMessage( item1.cc ++ item2.cc, item1.iterationNeeded || item2.iterationNeeded )
 66 |     }
 67 |     
 68 |     def main( args : Array[String] ) : Unit =
 69 |         {
 70 |            val timeBegin = System.currentTimeMillis()
 71 | 
 72 |             val property = new CCProperties("CCMR", args(0)).load.getImmutable
 73 |             
 74 |             val util = new CCUtil(property)
 75 |             val spark = util.getSparkContext()
 76 |             
 77 |             val timeSparkLoaded = System.currentTimeMillis()
 78 |             val file = spark.textFile( property.dataset , property.sparkPartition)
 79 | 
 80 |             util.io.printFileStart(property.appName)
 81 |             
 82 | //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 83 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 84 |             
 85 |             def toTreeSet(data : Set[Long]) : TreeSet[Long] =
 86 |             {
 87 |             	var toReturn : TreeSet[Long] = TreeSet()
 88 |             	val it = data.iterator
 89 |             	while (it.hasNext)
 90 |             	{
 91 |             		toReturn = toReturn + it.next
 92 |             	}
 93 |             	
 94 |             	toReturn
 95 |             }
 96 | 
 97 |             var ret = fusedData.map( item => ( item._1, new CcmrMessage( toTreeSet(item._2.toSet), false) ) )
 98 | 
 99 |             val timeDataLoaded = System.currentTimeMillis()
100 | //            // ccmr not correctly handle isolated vertices, these must be removed before starting the algorithm
101 | //            ret = ret.filter(t => !t._2.cc.isEmpty)
102 | 
103 |             var control = false;
104 |             var step = 0
105 |             
106 |             val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
107 |             val reduceInputSizeAccumulator = spark.accumulator(0L)
108 | 
109 |             while ( !control ) {
110 |                 val timeStepStart = System.currentTimeMillis()
111 | 
112 |                 ret = ret.flatMap( item => emitBlue( item ) )
113 | 
114 |                 if(property.printMessageStat)
115 |                 {
116 |                 	val previousMessageSize = reduceInputSizeAccumulator.value
117 | 						val previousMessageNumber = reduceInputMessageNumberAccumulator.value
118 |                 	
119 |                 	ret.foreach(t => reduceInputSizeAccumulator += t._2.cc.size + 1)
120 |                 	reduceInputMessageNumberAccumulator += ret.count
121 |                 	
122 |                 	util.printMessageStep(step + 1, reduceInputMessageNumberAccumulator.value - previousMessageNumber, reduceInputSizeAccumulator.value - previousMessageSize)
123 |                 }
124 |                 
125 |                 ret = ret.reduceByKey( reduceBlue ).cache
126 | 
127 |                 val controlMap = ret.map(t => t._2.voteToHalt)
128 | //                val test = controlMap.filter(t=>(!t)).count
129 | //                util.io.printStat(test, "active")
130 |                 control = controlMap.reduce{case (a,b) => a && b}
131 | 
132 |                 val timeStepBlue = System.currentTimeMillis()
133 | 
134 |                 step = step + 1
135 |                 util.io.printTime( timeStepStart, timeStepBlue, "blue" )
136 |                 util.printTimeStep(step, timeStepBlue-timeStepStart)
137 |             }
138 |             
139 |             val timeEnd = System.currentTimeMillis()
140 |             
141 |             
142 |             util.testEnded(	ret.filter(t => !t._2.cc.isEmpty).map(t => (t._1, t._2.cc.size + 1)), 
143 |             				step, 
144 |             				timeBegin, 
145 |             				timeEnd, 
146 |             				timeSparkLoaded, 
147 |             				timeDataLoaded, 
148 |             				reduceInputMessageNumberAccumulator.value, 
149 |             				reduceInputSizeAccumulator.value)
150 |         }
151 | }
152 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerMain.scala:
--------------------------------------------------------------------------------
  1 | package cracker
  2 | 
  3 | import org.apache.spark.SparkContext._
  4 | import org.apache.spark.SparkContext
  5 | import scala.collection.mutable.ListBuffer
  6 | import java.io.FileWriter
  7 | import org.apache.spark.rdd.RDD
  8 | import util.CCUtil
  9 | import util.CCUtil
 10 | import util.CCUtil
 11 | import util.CCProperties
 12 | 
 13 | object CrackerTreeMain {
 14 | 	
 15 | 	def main(args : Array[String]) : Unit =
 16 | 		{
 17 | 			val timeBegin = System.currentTimeMillis()
 18 | 			
 19 | 			/*
 20 | 			 * additional properties:
 21 | 			 * crackerUseUnionInsteadOfJoin : true | false
 22 | 			 * crackerCoalescePartition : true | false
 23 | 			 */
 24 | 			
 25 | 			val propertyLoad = new CCProperties("CRACKER_TREE_SPLIT", args(0)).load
 26 | 			val crackerUseUnionInsteadOfJoin = propertyLoad.getBoolean("crackerUseUnionInsteadOfJoin", true)
 27 | 			val crackerCoalescePartition = propertyLoad.getBoolean("crackerCoalescePartition", true)
 28 | 			val crackerForceEvaluation = propertyLoad.getBoolean("crackerForceEvaluation", true)
 29 | 			val crackerSkipPropagation = propertyLoad.getBoolean("crackerSkipPropagation", false)
 30 | 			
 31 | 			val property = propertyLoad.getImmutable
 32 | 			val cracker = new CrackerAlgorithm(property)
 33 | 
 34 | 			val util = new CCUtil(property)
 35 | 			val spark = util.getSparkContext()
 36 | 			val stats = new CrackerStats(property, util, spark)
 37 | 
 38 | 			val timeSparkLoaded = System.currentTimeMillis()
 39 | 			val file = spark.textFile(property.dataset, property.sparkPartition)
 40 | 
 41 | 			util.io.printFileStart(property.appName)
 42 | 
 43 | 			//            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 44 | 			val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 45 | 
 46 | 			var ret = fusedData.map(item => (item._1, new CrackerTreeMessageIdentification((item._2.toSet + item._1).min, item._2.toSet)))
 47 | 
 48 | 			val timeDataLoaded = System.currentTimeMillis()
 49 | 
 50 | 			var control = false;
 51 | 			var step = 0
 52 | 
 53 | 			var treeRDD : Option[RDD[(Long, CrackerTreeMessageTree)]] = Option.empty
 54 | 			
 55 | 			// if not done, CC of size 1 are not recognized
 56 | 			treeRDD = Option.apply(ret.map(t => (t._1, new CrackerTreeMessageTree(-1, Set()))))
 57 | 
 58 | 			while (!control) {
 59 | 				// simplification step
 60 | 			    val timeStepStart = System.currentTimeMillis()
 61 | 
 62 | 				stats.printSimplification(step, ret)
 63 | 				
 64 | 				ret = ret.flatMap(item => cracker.emitBlue(item, false))
 65 | 				
 66 | 				stats.printMessageStats(step + 1, ret)
 67 | 
 68 | 				ret = ret.reduceByKey(cracker.reduceBlue).cache
 69 | 
 70 | 				val active = ret.count
 71 | 				control = active == 0
 72 | 
 73 | 				val timeStepBlue = System.currentTimeMillis()
 74 | 				util.printTimeStep(step + 1, timeStepBlue-timeStepStart)
 75 | 
 76 | 				if (!control) {
 77 | 					stats.printSimplification(step+1, ret)
 78 | 					// reduction step
 79 | 					val tmp = ret.flatMap(item => cracker.emitRed(item))
 80 | 
 81 | 					stats.printMessageStats(step + 2, tmp)
 82 | 
 83 | 					val tmpReduced = tmp.reduceByKey(cracker.reduceRed)
 84 | 
 85 | 					ret = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get))
 86 | 					treeRDD = cracker.mergeTree(treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation)
 87 | 
 88 | 					val timeStepEnd = System.currentTimeMillis()
 89 | 					step = step + 2
 90 | 					util.io.printTimeStep(timeStepStart, timeStepBlue, timeStepEnd)
 91 | 					util.printTimeStep(step, timeStepEnd-timeStepBlue)
 92 | 				} else {
 93 | 					step = step + 1
 94 | 					util.io.printTime(timeStepStart, timeStepBlue, "blue")
 95 | 				}
 96 | 			}
 97 | 			
 98 | 			stats.printSimplification(step, ret)
 99 | 			
100 | 			if(!crackerSkipPropagation)
101 | 			{
102 | 
103 | 			var treeRDDPropagationTmp = treeRDD.get
104 | 			
105 | 			if(crackerUseUnionInsteadOfJoin && crackerCoalescePartition)
106 | 			{
107 | 			    val timeStepStart = System.currentTimeMillis()
108 | 				treeRDDPropagationTmp = treeRDDPropagationTmp.coalesce(property.sparkPartition)
109 | 				val timeStepBlue = System.currentTimeMillis()
110 | 				util.io.printTime(timeStepStart, timeStepBlue, "coalescing")
111 | 			}
112 | 			
113 | 			stats.printMessageStats(step, treeRDDPropagationTmp)
114 | 
115 | 			var treeRDDPropagation = treeRDDPropagationTmp.reduceByKey(cracker.reducePrepareDataForPropagation).map(t => (t._1, t._2.getMessagePropagation(t._1))).cache
116 | 
117 | 			control = false
118 | 			while (!control) {
119 | 				val timeStepStart = System.currentTimeMillis()
120 | 				treeRDDPropagation = treeRDDPropagation.flatMap(item => cracker.mapPropagate(item))
121 | 				
122 | 				stats.printMessageStats(step + 1, treeRDDPropagation)
123 | 				
124 | 				treeRDDPropagation = treeRDDPropagation.reduceByKey(cracker.reducePropagate).cache
125 | 				control = treeRDDPropagation.map(t => t._2.min != -1).reduce { case (a, b) => a && b }
126 | 
127 | 				step = step + 1
128 | 				val timeStepBlue = System.currentTimeMillis()
129 | 				util.io.printTime(timeStepStart, timeStepBlue, "propagation")
130 | 				util.printTimeStep(step, timeStepBlue-timeStepStart)
131 | 			}
132 | 
133 | 			val timeEnd = System.currentTimeMillis()
134 | 
135 | 			util.testEnded(treeRDDPropagation.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b },
136 | 				step,
137 | 				timeBegin,
138 | 				timeEnd,
139 | 				timeSparkLoaded,
140 | 				timeDataLoaded,
141 | 				stats.reduceInputMessageNumberAccumulator.value,
142 | 				stats.reduceInputSizeAccumulator.value,
143 | 				getBitmaskStat(crackerUseUnionInsteadOfJoin,crackerCoalescePartition,crackerForceEvaluation))
144 | 				
145 | 			} else
146 | 			{
147 | 				val timeEnd = System.currentTimeMillis()
148 | 				val vertexNumber = fusedData.count
149 | 				
150 | 				util.testEnded(treeRDD.get.map(t => (1L, 1)).reduceByKey { case (a, b) => a + b },
151 | 				step,
152 | 				timeBegin,
153 | 				timeEnd,
154 | 				timeSparkLoaded,
155 | 				timeDataLoaded,
156 | 				stats.reduceInputMessageNumberAccumulator.value + cracker.getMessageNumberForPropagation(step, vertexNumber),
157 | 				stats.reduceInputSizeAccumulator.value + cracker.getMessageSizeForPropagation(step, vertexNumber),
158 | 				getBitmaskStat(crackerUseUnionInsteadOfJoin,crackerCoalescePartition,crackerForceEvaluation))
159 | 			}
160 | 		}
161 | 	
162 | 	def bool2int(b:Boolean) = if (b) 1 else 0
163 | 	
164 | 	def getBitmaskStat(	crackerUseUnionInsteadOfJoin : Boolean,
165 | 	        			crackerCoalescePartition : Boolean,
166 | 	        			crackerForceEvaluation : Boolean) : String =
167 | 	{
168 | 	    bool2int(crackerUseUnionInsteadOfJoin).toString+bool2int(crackerCoalescePartition).toString+bool2int(crackerForceEvaluation).toString
169 | 	}
170 | 
171 | }


--------------------------------------------------------------------------------
/src/util/CCUtilIO.scala:
--------------------------------------------------------------------------------
  1 | package util
  2 | 
  3 | import java.io.FileWriter
  4 | import java.text.DecimalFormat
  5 | 
  6 | import org.apache.spark.SparkContext._
  7 | import org.apache.spark.SparkContext
  8 | import org.apache.spark.rdd.RDD
  9 | 
 10 | import com.google.common.base.Joiner
 11 | 
 12 | class CCUtilIO(property : CCPropertiesImmutable) extends Serializable
 13 | {
 14 | 	val fileStatDescription = "algorithmName,dataset,partition,step,timeAll,timeLoadingAndComputation,timeComputation,reduceInputMessageNumber,reduceInputSize,ccNumber,ccNumberNoIsolatedVertices,ccMaxSize,customColumn"
 15 | 	val fileSimplificationDescritpion = "dataset,step,activeVertices,activeVerticesNormalized"
 16 | 	
 17 |     def printStat( data : Long, description : String ) =
 18 |     {
 19 |         val printFile = new FileWriter( "time.txt", true )
 20 |         printFile.write( description + ": " + data + "\n" )
 21 |         printFile.close
 22 |     }
 23 | 	
 24 | 	def printSimplification( step : Int, activeVertices : Long, initialVertices : Long ) =
 25 |     {
 26 | 		val joiner = Joiner.on(",")
 27 | 		
 28 |         val printFile = new FileWriter( "simplification.txt", true )
 29 |         
 30 | 		val token : Array[Object] = Array(property.dataset, step.toString, activeVertices.toString, ((((activeVertices.toDouble * 100) / initialVertices)*100).round.toDouble / 100).toString)
 31 | 		printFile.write(joiner.join(token)+ "\n" )
 32 | 		
 33 |         printFile.close
 34 |     }
 35 | 
 36 |     def printSimplification( step : Int, activeVertices : Long, initialVertices : Long , activeEdges : Double, degreeMax : Int) =
 37 |     {
 38 |         val printFile = new FileWriter( "simplification.txt", true )
 39 | 
 40 |         val token : Array[Object] = Array(  property.dataset,
 41 |                                             step.toString,
 42 |                                             activeVertices.toString,
 43 |                                             ((((activeVertices.toDouble * 100) / initialVertices)*100).round.toDouble / 100).toString,
 44 |                                             property.algorithmName,
 45 |                                             activeEdges.toString,
 46 |                                             (activeEdges / activeVertices).toString,
 47 |                                             degreeMax.toString)
 48 |         printFile.write(token.mkString(",")+ "\n" )
 49 | 
 50 |         printFile.close
 51 |     }
 52 | 	
 53 | 	def printTimeStep( step : Int, time : Long) =
 54 |     {
 55 | 		val joiner = Joiner.on(",")
 56 | 		
 57 |         val printFile = new FileWriter( "timeStep.txt", true )
 58 |         
 59 | 		// dataset, algorithmName, step, time
 60 | 		val token : Array[Object] = Array(property.dataset, property.algorithmName, step.toString, time.toString)
 61 | 		printFile.write(joiner.join(token)+ "\n" )
 62 | 		
 63 |         printFile.close
 64 |     }
 65 | 	
 66 | 	def printMessageStep( step : Int, messageNumber : Long, messageSize : Long) =
 67 |     {
 68 | 		val joiner = Joiner.on(",")
 69 | 		
 70 |         val printFile = new FileWriter( "messageStep.txt", true )
 71 |         
 72 | 		val token : Array[Object] = Array(property.dataset, property.algorithmName, step.toString, messageNumber.toString, messageSize.toString)
 73 | 		printFile.write(joiner.join(token)+ "\n" )
 74 | 		
 75 |         printFile.close
 76 |     }
 77 | 	
 78 | 	def printAllStat(	algorithmName : String, 
 79 | 						dataset : String,
 80 | 						partition : Int, 
 81 | 						step : Int,
 82 | 						timaAll : Long,
 83 | 						timeLoadingAndComputation : Long,
 84 | 						timeComputation : Long,
 85 | 						reduceInputMessageNumber : Long,
 86 | 						reduceInputSize : Long,
 87 | 						ccNumber : Long,
 88 | 						ccNumberNoIsolatedVertices : Long,
 89 | 						ccMaxSize : Int,
 90 | 						customColumnValue : String) =
 91 | 	{
 92 | 		val printFile = new FileWriter( "stats.txt", true )
 93 | 		val joiner = Joiner.on(",")
 94 | 		val token : Array[Object] = Array(algorithmName, dataset, partition.toString, step.toString, timaAll.toString, timeLoadingAndComputation.toString, timeComputation.toString, reduceInputMessageNumber.toString, reduceInputSize.toString, ccNumber.toString, ccNumberNoIsolatedVertices.toString, ccMaxSize.toString, customColumnValue)
 95 | 		
 96 | 		printFile.write(joiner.join(token)+ "\n" )
 97 |         printFile.close
 98 | 	}
 99 | 	
100 | 	def printCCDistribution(rdd : RDD[(Long, Int)]) =
101 | 	{
102 | 		val printFile = new FileWriter( "distribution.txt", true )
103 | 		val joiner = Joiner.on(",")
104 | 		
105 | 		val ccDistribution = rdd.map(t=>(t._2,1)).reduceByKey{case(a,b)=>a+b}.map(t=>t._1+","+t._2+"\n").reduce{case(a,b)=>a+b}
106 | 		
107 | //		val token : Array[Object] = Array(algorithmName, dataset, partition.toString, hybridMessageSizeBound.toString, step.toString, timaAll.toString, timeLoadingAndComputation.toString, timeComputation.toString, reduceInputMessageNumber.toString, reduceInputSize.toString, ccNumber.toString, ccMaxSize.toString)
108 | //		
109 | //		printFile.write(joiner.join(token)+ "\n" )
110 | 		printFile.write(ccDistribution+ "\n" )
111 | 		
112 |         printFile.close
113 | 	}
114 |     
115 |     def printEdgelist( data : RDD[(Long,Long)] ) =
116 |     {
117 |         val collected = data.collect.iterator
118 |         val printFile = new FileWriter( "edgelist.txt", true )
119 |         while(collected.hasNext)
120 |         {
121 |             val next = collected.next
122 |             printFile.write( next._1+" "+next._2 + "\n" )
123 |         }
124 |         printFile.close
125 |     }
126 |     
127 |     def printFileStart(description : String) =
128 |         {
129 |            val printFile = new FileWriter( "time.txt", true )
130 |             printFile.write("\n"+ description+": START\n" )
131 |             printFile.close
132 |         }
133 |     
134 |     def printFileEnd(description : String) =
135 |         {
136 |            val printFile = new FileWriter( "time.txt", true )
137 |             printFile.write( description+": END\n" )
138 |             printFile.close
139 |         }
140 |     
141 |      def printTime( start : Long, end : Long, description : String ) =
142 |         {
143 |             val printFile = new FileWriter( "time.txt", true )
144 |             printFile.write( description + ": " + ( end - start ) + "\n" )
145 |             printFile.close
146 |         }
147 |     
148 |     def printStep( step : Int ) =
149 |         {
150 |             val printFile = new FileWriter( "time.txt", true )
151 |             printFile.write( "step: "+ step + "\n" )
152 |             printFile.close
153 |         }
154 |     
155 |     def printTimeStep( start : Long, red : Long, end : Long ) =
156 |         {
157 |             val printFile = new FileWriter( "time.txt", true )
158 |             printFile.write( "blue: " + ( red - start ) + " red: " + ( end - red ) + " all: " + ( end - start ) + "\n" )
159 |             printFile.close
160 |         }
161 |     
162 |     def printToFile( file : String, data : String ) =
163 |         {
164 |             val printFile = new FileWriter( file, true )
165 |             printFile.write( data )
166 |             printFile.close
167 |         }
168 | 
169 | }


--------------------------------------------------------------------------------
/src/util/CCUtil.scala:
--------------------------------------------------------------------------------
  1 | package util
  2 | 
  3 | import org.apache.spark.SparkContext._
  4 | import org.apache.spark.rdd.RDD
  5 | import scala.collection.mutable.ListBuffer
  6 | import org.apache.spark.SparkContext
  7 | import org.apache.spark.SparkConf
  8 | 
  9 | class CCUtil(property: CCPropertiesImmutable) extends Serializable {
 10 |   val io = new CCUtilIO(property)
 11 |   var vertexNumber = 0L
 12 | 
 13 |   def getSparkContext(): SparkContext = {
 14 |     val conf = new SparkConf()
 15 |       .setMaster(property.sparkMaster)
 16 |       .setAppName(property.appName)
 17 |       .set("spark.executor.memory", property.sparkExecutorMemory)
 18 |       .set("spark.storage.blockManagerSlaveTimeoutMs", property.sparkBlockManagerSlaveTimeoutMs)
 19 |       .setJars(Array(property.jarPath))
 20 | 
 21 |     if (property.sparkCoresMax > 0) {
 22 |       conf.set("spark.cores.max", property.sparkCoresMax.toString)
 23 |       val executorCore = property.sparkCoresMax / property.sparkExecutorInstances
 24 |       conf.set("spark.executor.cores", executorCore.toString)
 25 |     }
 26 |     if (property.sparkExecutorInstances > 0) {
 27 |       conf.set("spark.executor.instances", property.sparkExecutorInstances.toString)
 28 |     }
 29 | 
 30 |     val spark = new SparkContext(conf)
 31 | 
 32 |     spark.setCheckpointDir(".")
 33 | 
 34 |     spark
 35 |   }
 36 | 
 37 |   // return edgelist and edge associated to each vertex
 38 |   def loadEdgeFromFile(data: RDD[String]): (RDD[(Long, Long)], RDD[(Long, Iterable[Long])]) = {
 39 |     val toReturnEdgeList = data.flatMap(line => {
 40 |       val splitted = line.split(property.separator)
 41 |       if (splitted.size >= 1) {
 42 |         try {
 43 |           Array((splitted(0).toLong, splitted(1).toLong), (splitted(1).toLong, splitted(0).toLong))
 44 |         } catch {
 45 |           case e: Exception => Array[(Long, Long)]()
 46 |         }
 47 |       } else {
 48 |         Array[(Long, Long)]()
 49 |       }
 50 |     })
 51 | 
 52 |     val toReturnVertex = toReturnEdgeList.distinct.groupByKey
 53 | 
 54 |     if (property.printMessageStat) {
 55 |       val edgeNumber = toReturnEdgeList.count / 2
 56 |       vertexNumber = toReturnVertex.count
 57 | 
 58 |       io.printStat(edgeNumber, "edgeNumber")
 59 |       io.printStat(vertexNumber, "vertexNumber")
 60 |     }
 61 | 
 62 |     (toReturnEdgeList, toReturnVertex)
 63 |   }
 64 | 
 65 |   // load from a file in the format of
 66 |   // vertexID, arcID
 67 |   def loadVertexEdgeFile(data: RDD[String]): (RDD[(Long, Long)], RDD[(Long, Iterable[Long])]) = {
 68 |     def mapToEdgeList(item: (String, Iterable[Long])): Iterable[(Long, Long)] = {
 69 |       var outputList: ListBuffer[(Long, Long)] = new ListBuffer
 70 | 
 71 |       val it = item._2.iterator
 72 | 
 73 |       while (it.hasNext) {
 74 |         val next = it.next
 75 |         val it2 = item._2.iterator
 76 | 
 77 |         while (it2.hasNext) {
 78 |           val next2 = it2.next
 79 | 
 80 |           if (next != next2) {
 81 |             outputList.prepend((next, next2))
 82 |           }
 83 |         }
 84 |       }
 85 | 
 86 |       outputList.toIterable
 87 |     }
 88 | 
 89 |     val toReturnEdgeList = data.flatMap(line => {
 90 |       val splitted = line.split(",")
 91 |       if (splitted.size >= 1) {
 92 |         try {
 93 |           Array((splitted(1), splitted(0).toLong))
 94 |         } catch {
 95 |           case e: Exception => Array[(String, Long)]()
 96 |         }
 97 |       } else {
 98 |         Array[(String, Long)]()
 99 |       }
100 |     })
101 | 
102 |     val edgeList = toReturnEdgeList.groupByKey.flatMap(mapToEdgeList)
103 | 
104 |     //			io.printEdgelist(edgeList)
105 | 
106 |     val toReturnVertex = edgeList.groupByKey
107 | 
108 |     if (property.printMessageStat) {
109 |       val edgeNumber = toReturnEdgeList.count
110 |       val vertexNumber = toReturnVertex.count
111 | 
112 |       io.printStat(edgeNumber, "edgeNumber")
113 |       io.printStat(vertexNumber, "vertexNumber")
114 |     }
115 | 
116 |     (edgeList, toReturnVertex)
117 |   }
118 | 
119 |   def getCCNumber(rdd: RDD[(Long, Int)]) = {
120 |     rdd.count
121 |   }
122 | 
123 |   def getCCNumberNoIsolatedVertices(rdd: RDD[(Long, Int)]) = {
124 |     rdd.filter(t => t._2 != 1).count
125 |   }
126 | 
127 |   def getCCMaxSize(rdd: RDD[(Long, Int)]) = {
128 |     rdd.map(t => t._2).max
129 |   }
130 | 
131 |   def printSimplification(step: Int, activeVertices: Long) = {
132 |     io.printSimplification(step, activeVertices, vertexNumber)
133 |   }
134 | 
135 |   def printSimplification(step: Int, activeVertices: Long, activeEdges: Double, degreeMax: Int) = {
136 |     io.printSimplification(step, activeVertices, vertexNumber, activeEdges, degreeMax)
137 |   }
138 | 
139 |   def printTimeStep(step: Int, time: Long) = {
140 |     if (!property.printMessageStat)
141 |       io.printTimeStep(step, time)
142 |   }
143 | 
144 |   def printMessageStep(step: Int, messageNumber: Long, messageSize: Long) = {
145 |     io.printMessageStep(step, messageNumber, messageSize)
146 |   }
147 | 
148 |   def testEnded(rdd: RDD[(Long, Int)], step: Int, timeBegin: Long, timeEnd: Long, timeSparkLoaded: Long, timeDataLoaded: Long, reduceInputMessageNumber: Long, reduceInputSize: Long, bitmask: String = "", optimization: String = "") = {
149 |     io.printTime(timeBegin, timeEnd, "all")
150 |     io.printTime(timeSparkLoaded, timeEnd, "allComputationAndLoadingGraph")
151 |     io.printTime(timeDataLoaded, timeEnd, "allComputation")
152 |     io.printStep(step)
153 |     io.printStat(reduceInputMessageNumber, "reduceInputMessageNumber")
154 |     io.printStat(reduceInputSize, "reduceInputSize")
155 |     io.printFileEnd(property.appName)
156 | 
157 |     io.printAllStat(property.algorithmName,
158 |       property.dataset,
159 |       property.sparkPartition,
160 |       step,
161 |       (timeEnd - timeBegin),
162 |       (timeEnd - timeSparkLoaded),
163 |       (timeEnd - timeDataLoaded),
164 |       reduceInputMessageNumber,
165 |       reduceInputSize,
166 |       getCCNumber(rdd),
167 |       getCCNumberNoIsolatedVertices(rdd),
168 |       getCCMaxSize(rdd),
169 |       property.customColumnValue)
170 | 
171 |     if (property.printCCDistribution)
172 |       io.printCCDistribution(rdd)
173 |   }
174 | 
175 |   /*def testEnded(ccNumber : Long, ccNumberNoIsolatedVertices : Long, step : Int, timeBegin : Long, timeEnd : Long, timeSparkLoaded : Long, timeDataLoaded : Long, reduceInputMessageNumber : Long, reduceInputSize : Long)  =
176 |   {
177 |     io.printTime( timeBegin, timeEnd, "all" )
178 |         io.printTime( timeSparkLoaded, timeEnd, "allComputationAndLoadingGraph" )
179 |         io.printTime( timeDataLoaded, timeEnd, "allComputation" )
180 |         io.printStep( step )
181 |         io.printStat(reduceInputMessageNumber, "reduceInputMessageNumber")
182 |         io.printStat(reduceInputSize, "reduceInputSize")
183 |         io.printFileEnd(property.appName)
184 | 
185 |         io.printAllStat(	property.algorithmName,
186 |                   property.dataset,
187 |                 property.sparkPartition,
188 |                 step,
189 |                 (timeEnd - timeBegin),
190 |                 (timeEnd - timeSparkLoaded) ,
191 |                 (timeEnd - timeDataLoaded),
192 |                 reduceInputMessageNumber,
193 |                 reduceInputSize,
194 |                 ccNumber,
195 |                 ccNumberNoIsolatedVertices,
196 |                 0,
197 |                 property.customColumnValue)
198 |   }*/
199 | }
200 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>cracker</groupId>
  5 | 	<artifactId>cracker</artifactId>
  6 | 	<version>0.0.1-SNAPSHOT</version>
  7 | 	<dependencies>
  8 | 		<!-- <dependency> -->
  9 | 		<!-- <groupId>org.scala-lang</groupId> -->
 10 | 		<!-- <artifactId>scala-library</artifactId> -->
 11 | 		<!-- <version>2.7.2</version> -->
 12 | 		<!-- </dependency> -->
 13 | 		<dependency>
 14 | 			<groupId>org.apache.spark</groupId>
 15 | 			<artifactId>spark-core_2.10</artifactId>
 16 | 			<version>2.1.1</version>
 17 | 		</dependency>
 18 | 		<dependency>
 19 | 			<groupId>org.scala-lang</groupId>
 20 | 			<artifactId>scala-library</artifactId>
 21 | 			<version>2.10.6</version>
 22 | 		</dependency>
 23 | 	</dependencies>
 24 | 	<build>
 25 | 		<sourceDirectory>src</sourceDirectory>
 26 | 		<pluginManagement>
 27 | 			<plugins>
 28 | 				<plugin>
 29 | 					<groupId>net.alchim31.maven</groupId>
 30 | 					<artifactId>scala-maven-plugin</artifactId>
 31 | 					<version>3.2.1</version>
 32 | 				</plugin>
 33 | 				<plugin>
 34 | 					<groupId>org.apache.maven.plugins</groupId>
 35 | 					<artifactId>maven-compiler-plugin</artifactId>
 36 | 					<version>2.0.2</version>
 37 | 					<configuration>
 38 | 						<source>1.7</source>
 39 | 						<target>1.7</target>
 40 | 					</configuration>
 41 | 				</plugin>
 42 | 				<plugin>
 43 | 					<groupId>org.apache.maven.plugins</groupId>
 44 | 					<artifactId>maven-jar-plugin</artifactId>
 45 | 					<version>2.2</version>
 46 | 					<!-- nothing here -->
 47 | 				</plugin>
 48 | 				<plugin>
 49 | 					<groupId>org.apache.maven.plugins</groupId>
 50 | 					<artifactId>maven-shade-plugin</artifactId>
 51 | 					<version>1.5</version>
 52 | 					<executions>
 53 | 						<execution>
 54 | 							<phase>package</phase>
 55 | 							<goals>
 56 | 								<goal>shade</goal>
 57 | 							</goals>
 58 | 							<configuration>
 59 | 								<shadedArtifactAttached>true</shadedArtifactAttached>
 60 | 								<shadedClassifierName>allinone</shadedClassifierName>
 61 | 								<artifactSet>
 62 | 									<includes>
 63 | 										<include>*:*</include>
 64 | 									</includes>
 65 | 								</artifactSet>
 66 | 								<transformers>
 67 | 									<transformer
 68 | 										implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
 69 | 										<resource>reference.conf</resource>
 70 | 									</transformer>
 71 | 									<transformer
 72 | 										implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
 73 | 										<manifestEntries>
 74 | 											<Main-Class>akka.Main</Main-Class>
 75 | 										</manifestEntries>
 76 | 									</transformer>
 77 | 								</transformers>
 78 | 							</configuration>
 79 | 						</execution>
 80 | 					</executions>
 81 | 				</plugin>
 82 | 				<!-- <plugin> -->
 83 | 				<!-- <artifactId>maven-shade-plugin</artifactId> -->
 84 | 				<!-- <version>2.4.1</version> -->
 85 | 				<!-- <executions> -->
 86 | 				<!-- <execution> -->
 87 | 				<!-- <phase>package</phase> -->
 88 | 				<!-- <goals> -->
 89 | 				<!-- <goal>shade</goal> -->
 90 | 				<!-- </goals> -->
 91 | 				<!-- <configuration> -->
 92 | 				<!-- <transformers> -->
 93 | 				<!-- <transformer -->
 94 | 				<!-- implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> -->
 95 | 				<!-- <resource>reference.conf</resource> -->
 96 | 				<!-- </transformer> -->
 97 | 				<!-- <transformer -->
 98 | 				<!-- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> -->
 99 | 				<!-- <mainClass>util.Main</mainClass> -->
100 | 				<!-- </transformer> -->
101 | 				<!-- </transformers> -->
102 | 				<!-- </configuration> -->
103 | 				<!-- </execution> -->
104 | 				<!-- </executions> -->
105 | 				<!-- </plugin> -->
106 | 				<plugin>
107 | 					<groupId>org.apache.maven.plugins</groupId>
108 | 					<artifactId>maven-assembly-plugin</artifactId>
109 | 					<version>2.2-beta-4</version>
110 | 					<configuration>
111 | 						<descriptorRefs>
112 | 							<descriptorRef>jar-with-dependencies</descriptorRef>
113 | 						</descriptorRefs>
114 | 						<archive>
115 | 							<manifest>
116 | 								<mainClass>util.Main</mainClass>
117 | 							</manifest>
118 | 						</archive>
119 | 					</configuration>
120 | 					<executions>
121 | 						<execution>
122 | 							<phase>package</phase>
123 | 							<goals>
124 | 								<goal>single</goal>
125 | 							</goals>
126 | 						</execution>
127 | 					</executions>
128 | 				</plugin>
129 | 				<plugin>
130 | 					<groupId>org.scala-tools</groupId>
131 | 					<artifactId>maven-scala-plugin</artifactId>
132 | 
133 | 					<executions>
134 | 						<execution>
135 | 							<id>compile</id>
136 | 							<goals>
137 | 								<goal>compile</goal>
138 | 							</goals>
139 | 							<phase>compile</phase>
140 | 						</execution>
141 | 
142 | 						<execution>
143 | 							<id>test-compile</id>
144 | 							<goals>
145 | 								<goal>testCompile</goal>
146 | 							</goals>
147 | 							<phase>test-compile</phase>
148 | 						</execution>
149 | 
150 | 						<execution>
151 | 							<phase>process-resources</phase>
152 | 							<goals>
153 | 								<goal>compile</goal>
154 | 							</goals>
155 | 						</execution>
156 | 					</executions>
157 | 				</plugin>
158 | 			</plugins>
159 | 		</pluginManagement>
160 | 		<plugins>
161 | 			<!-- <plugin> -->
162 | 			<!-- <artifactId>maven-compiler-plugin</artifactId> -->
163 | 			<!-- <version>3.1</version> -->
164 | 			<!-- <configuration> -->
165 | 			<!-- <source/> -->
166 | 			<!-- <target/> -->
167 | 			<!-- </configuration> -->
168 | 			<!-- </plugin> -->
169 | 			<plugin>
170 | 				<groupId>net.alchim31.maven</groupId>
171 | 				<artifactId>scala-maven-plugin</artifactId>
172 | 				<executions>
173 | 					<execution>
174 | 						<id>scala-compile-first</id>
175 | 						<phase>process-resources</phase>
176 | 						<goals>
177 | 							<goal>add-source</goal>
178 | 							<goal>compile</goal>
179 | 						</goals>
180 | 					</execution>
181 | 					<execution>
182 | 						<id>scala-test-compile</id>
183 | 						<phase>process-test-resources</phase>
184 | 						<goals>
185 | 							<goal>testCompile</goal>
186 | 						</goals>
187 | 					</execution>
188 | 				</executions>
189 | 			</plugin>
190 | 			<plugin>
191 | 				<groupId>org.apache.maven.plugins</groupId>
192 | 				<artifactId>maven-compiler-plugin</artifactId>
193 | 				<executions>
194 | 					<execution>
195 | 						<phase>compile</phase>
196 | 						<goals>
197 | 							<goal>compile</goal>
198 | 						</goals>
199 | 					</execution>
200 | 				</executions>
201 | 			</plugin>
202 | 
203 | 
204 | 
205 | 			<plugin>
206 | 				<groupId>org.apache.maven.plugins</groupId>
207 | 				<artifactId>maven-jar-plugin</artifactId>
208 | 				<configuration>
209 | 					<excludes>
210 | 						<exclude>**/log4j.properties</exclude>
211 | 					</excludes>
212 | 					<archive>
213 | 						<manifest>
214 | 							<addClasspath>true</addClasspath>
215 | 							<mainClass>util.Main</mainClass>
216 | 							<classpathPrefix>dependency-jars/</classpathPrefix>
217 | 						</manifest>
218 | 					</archive>
219 | 				</configuration>
220 | 			</plugin>
221 | 			<!-- Copy project dependency -->
222 | 			<plugin>
223 | 				<artifactId>maven-assembly-plugin</artifactId>
224 | 				<executions>
225 | 					<execution>
226 | 						<phase>package</phase>
227 | 						<goals>
228 | 							<goal>single</goal>
229 | 						</goals>
230 | 					</execution>
231 | 				</executions>
232 | 				<configuration>
233 | 					<descriptorRefs>
234 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
235 | 					</descriptorRefs>
236 | 				</configuration>
237 | 			</plugin>
238 | 		</plugins>
239 | 	</build>
240 | </project>
241 | 


--------------------------------------------------------------------------------
/src/cracker/CrackerAlgorithm.scala:
--------------------------------------------------------------------------------
  1 | package cracker
  2 | 
  3 | import org.apache.spark.SparkContext._
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.apache.spark.rdd.RDD
  6 | import org.apache.spark.SparkContext
  7 | import java.io.FileWriter
  8 | import util.CCPropertiesImmutable
  9 | 
 10 | @serializable
 11 | class CrackerAlgorithm(property : CCPropertiesImmutable) {
 12 | 	def mapPropagate(item : (Long, CrackerTreeMessagePropagation)) : Iterable[(Long, CrackerTreeMessagePropagation)] =
 13 | 		{
 14 | 			var outputList : ListBuffer[(Long, CrackerTreeMessagePropagation)] = new ListBuffer
 15 | 			if (item._2.min != -1) {
 16 | 				outputList.prepend((item._1, new CrackerTreeMessagePropagation(item._2.min, Set())))
 17 | 				val it = item._2.child.iterator
 18 | 				while (it.hasNext) {
 19 | 					val next = it.next
 20 | 					outputList.prepend((next, new CrackerTreeMessagePropagation(item._2.min, Set())))
 21 | 				}
 22 | 			} else {
 23 | 				outputList.prepend(item)
 24 | 			}
 25 | 			outputList
 26 | 		}
 27 | 
 28 | 	def reducePropagate(item1 : CrackerTreeMessagePropagation, item2 : CrackerTreeMessagePropagation) : CrackerTreeMessagePropagation =
 29 | 		{
 30 | 			var minEnd = item1.min
 31 | 			if (minEnd == -1) minEnd = item2.min
 32 | 
 33 | 			new CrackerTreeMessagePropagation(minEnd, item1.child ++ item2.child)
 34 | 		}
 35 | 
 36 | 	def emitBlue(item : (Long, CrackerTreeMessageIdentification), forceLoadBalancing : Boolean, edgePruning : Boolean = true) : Iterable[(Long, CrackerTreeMessageIdentification)] =
 37 | 		{
 38 | 			var outputList : ListBuffer[(Long, CrackerTreeMessageIdentification)] = new ListBuffer
 39 | 			if (item._2.min == item._1 && (item._2.neigh.isEmpty || (item._2.neigh.size == 1 && item._2.neigh.contains(item._1)))) {
 40 | 				//                outputList.prepend( ( item._1, new CrackerTreeMessage( item._2.min, Set()) ) )
 41 | 			} else {
 42 | 
 43 | 				val min = item._2.min
 44 | 
 45 | 				if (item._2.neigh.isEmpty) {
 46 | 					outputList.prepend((item._1, new CrackerTreeMessageIdentification(min, Set())))
 47 | 				} else {
 48 | 					outputList.prepend((item._1, new CrackerTreeMessageIdentification(min, Set(min))))
 49 | 				}
 50 | 
 51 | 				if (min < item._1 || !forceLoadBalancing || !edgePruning) {
 52 | 					val it = item._2.neigh.iterator
 53 | 					while (it.hasNext) {
 54 | 						val next = it.next
 55 | 						outputList.prepend((next, new CrackerTreeMessageIdentification(min, Set(min))))
 56 | 					}
 57 | 				}
 58 | 			}
 59 | 			
 60 | //			val printFile = new FileWriter( "check.txt", true )
 61 | //
 62 | //		printFile.write("BLUE "+item._1+ "\n" )
 63 | //		
 64 | //        printFile.close
 65 | 
 66 | 			outputList.toIterable
 67 | 		}
 68 | 	
 69 | 	def emitRed(item : (Long, CrackerTreeMessageIdentification)) : Iterable[(Long, CrackerTreeMessageRedPhase)] = {
 70 | 
 71 | 		emitRed(item, false)
 72 | 	}
 73 | 
 74 | 	def emitRed(item : (Long, CrackerTreeMessageIdentification), forceLoadBalancing : Boolean, obliviousSeed : Boolean = true) : Iterable[(Long, CrackerTreeMessageRedPhase)] = {
 75 | 
 76 | 		var outputList : ListBuffer[(Long, CrackerTreeMessageRedPhase)] = new ListBuffer
 77 | 
 78 | 		val minset : Set[Long] = item._2.neigh
 79 | 		if (minset.size > 1) {
 80 | 			if(property.loadBalancing || forceLoadBalancing || obliviousSeed)
 81 | 			{
 82 | 				outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, Set(item._2.min)))))
 83 | 			}
 84 | 		    else
 85 | 		    {
 86 | 		        outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, minset))))
 87 | 		    }	
 88 | 			var it = minset.iterator
 89 | 			while (it.hasNext) {
 90 | 				val value : Long = it.next
 91 | 				if (value != item._2.min)
 92 | 					outputList.prepend((value, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._2.min, Set(item._2.min)))))
 93 | 			} 
 94 | 		} else if (minset.size == 1 && minset.contains(item._1)) {
 95 | 			outputList.prepend((item._1, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageIdentification(item._1, Set()))))
 96 | 		}
 97 | 
 98 | 		if (!item._2.neigh.contains(item._1)) {
 99 | 			outputList.prepend((item._2.min, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageTree(-1, Set(item._1)))))
100 | 			outputList.prepend((item._1, CrackerTreeMessageRedPhase.apply(new CrackerTreeMessageTree(item._2.min, Set()))))
101 | 		}
102 | 		
103 | //					val printFile = new FileWriter( "check.txt", true )
104 | //
105 | //		printFile.write("RED "+item._1+ "\n" )
106 | //		
107 | //        printFile.close
108 | 
109 | 		outputList.toIterable
110 | 	}
111 | 
112 | 	def reduceBlue(item1 : CrackerTreeMessageIdentification, item2 : CrackerTreeMessageIdentification) : CrackerTreeMessageIdentification =
113 | 		{
114 | 			val ret = item1.neigh ++ item2.neigh
115 | 			val min = Math.min(item1.min, item2.min)
116 | 
117 | 			new CrackerTreeMessageIdentification(min, ret)
118 | 		}
119 | 
120 | 	def mergeMessageIdentification(first : Option[CrackerTreeMessageIdentification], second : Option[CrackerTreeMessageIdentification]) : Option[CrackerTreeMessageIdentification] =
121 | 		{
122 | 			if (first.isDefined) {
123 | 				first.get.merge(second)
124 | 			} else {
125 | 				second
126 | 			}
127 | 		}
128 | 
129 | 	def mergeMessageTree(first : Option[CrackerTreeMessageTree], second : Option[CrackerTreeMessageTree]) : Option[CrackerTreeMessageTree] =
130 | 		{
131 | 			if (first.isDefined) {
132 | 				first.get.merge(second)
133 | 			} else {
134 | 				second
135 | 			}
136 | 		}
137 | 
138 | 	def reduceRed(item1 : CrackerTreeMessageRedPhase, item2 : CrackerTreeMessageRedPhase) : CrackerTreeMessageRedPhase =
139 | 		{
140 | 			new CrackerTreeMessageRedPhase(mergeMessageIdentification(item1.first, item2.first), mergeMessageTree(item1.second, item2.second))
141 | 		}
142 | 
143 | 	def mergeTree(start : Option[RDD[(Long, CrackerTreeMessageTree)]], add : RDD[(Long, CrackerTreeMessageTree)], crackerUseUnionInsteadOfJoin : Boolean, crackerForceEvaluation : Boolean) : Option[RDD[(Long, CrackerTreeMessageTree)]] =
144 | 		{
145 | 			if (start.isDefined) {
146 | 				if(crackerUseUnionInsteadOfJoin)
147 | 				{
148 | 					Option.apply(start.get.union(add))
149 | 				} else
150 | 				{
151 | 					if(crackerForceEvaluation)
152 | 					{
153 | 						val treeUpdated = start.get.leftOuterJoin(add).map(t => (t._1, t._2._1.merge(t._2._2).get))
154 | 						val forceEvaluation = treeUpdated.count
155 | 						Option.apply(treeUpdated)
156 | 					} else
157 | 					{
158 | 						Option.apply(start.get.leftOuterJoin(add).map(t => (t._1, t._2._1.merge(t._2._2).get)))
159 | 					}
160 | 				}
161 | 			} else {
162 | 				Option.apply(add)
163 | 			}
164 | 		}
165 | 	
166 | 	def mergeTree(spark : SparkContext, start : Option[RDD[(Long, CrackerTreeMessageTree)]], add : Array[(Long, CrackerTreeMessageTree)], crackerUseUnionInsteadOfJoin : Boolean, crackerForceEvaluation : Boolean) : Option[RDD[(Long, CrackerTreeMessageTree)]] =
167 | 		{
168 | 			if (start.isDefined) {
169 | 				if(crackerUseUnionInsteadOfJoin)
170 | 				{
171 | 					Option.apply(start.get.union(spark.parallelize(add)))
172 | 				} else
173 | 				{
174 | 					if(crackerForceEvaluation)
175 | 					{
176 | 						val treeUpdated = start.get.leftOuterJoin(spark.parallelize(add)).map(t => (t._1, t._2._1.merge(t._2._2).get))
177 | 						val forceEvaluation = treeUpdated.count
178 | 						Option.apply(treeUpdated)
179 | 					} else
180 | 					{
181 | 						Option.apply(start.get.leftOuterJoin(spark.parallelize(add)).map(t => (t._1, t._2._1.merge(t._2._2).get)))
182 | 					}
183 | 				}
184 | 			} else {
185 | 				Option.apply(spark.parallelize(add))
186 | 			}
187 | 		}
188 | 	
189 | 	def mergeTree(start : Option[Array[(Long, CrackerTreeMessageTree)]], add : Array[(Long, CrackerTreeMessageTree)]) : Option[Array[(Long, CrackerTreeMessageTree)]] =
190 | 		{
191 | 			if (start.isDefined) {
192 | 				Option.apply(start.get.union(add))
193 | 			} else {
194 | 				Option.apply(add)
195 | 			}
196 | 		}
197 | 
198 | 	def reducePrepareDataForPropagation(a : CrackerTreeMessageTree, b : CrackerTreeMessageTree) : CrackerTreeMessageTree =
199 | 		{
200 | 			var parent = a.parent
201 | 			if (parent == -1) parent = b.parent
202 | 
203 | 			new CrackerTreeMessageTree(parent, a.child ++ b.child)
204 | 		}
205 | 	
206 | 	def getMessageNumberForPropagation(step : Int, vertexNumber : Long) =
207 | 	{
208 | 		val stepPropagation = (step - 1) / 2
209 | 		
210 | 		(vertexNumber * stepPropagation) + vertexNumber
211 | 	}
212 | 	
213 | 	def getMessageSizeForPropagation(step : Int, vertexNumber : Long) =
214 | 	{
215 | 		val stepPropagation = (step - 1) / 2
216 | 		
217 | 		((vertexNumber * 2) * stepPropagation) - vertexNumber
218 | 	}
219 | }


--------------------------------------------------------------------------------
/src/crackerAllOptimizations/CrackerAllOptimizationsMain.scala:
--------------------------------------------------------------------------------
  1 | package crackerAllOptimizations
  2 | 
  3 | import scala.Array.canBuildFrom
  4 | import org.apache.spark.SparkContext
  5 | import org.apache.spark.rdd.RDD
  6 | import cracker._
  7 | import util.CCProperties
  8 | import util.CCUtil
  9 | import util.CCPropertiesImmutable
 10 | import java.io.PrintWriter
 11 | import java.io.File
 12 | import java.io.FileWriter
 13 | 
 14 | object CrackerAllOptimizationsMain {
 15 | 
 16 |   def printGraph(util: CCUtil, step: Int, description: String, g: RDD[(Long, CrackerTreeMessageIdentification)]) = {
 17 |     util.io.printToFile("graph.txt", "STEP " + step + "\t[" + description + "]\t" + g.map(t => "{" + t._1 + " " + t._2.toString + "} ").reduce { case (a, b) => a + b } + "\n")
 18 |   }
 19 | 
 20 |   def main(args: Array[String]): Unit = {
 21 |     val timeBegin = System.currentTimeMillis()
 22 |     /*
 23 |      * additional properties:
 24 |      * crackerUseUnionInsteadOfJoin : true | false
 25 |      * crackerCoalescePartition : true | false
 26 |      */
 27 | 
 28 |     val propertyLoad = new CCProperties("CRACKER_ALL", args(0)).load
 29 |     val crackerUseUnionInsteadOfJoin = propertyLoad.getBoolean("crackerUseUnionInsteadOfJoin", true)
 30 |     val crackerCoalescePartition = propertyLoad.getBoolean("crackerCoalescePartition", true)
 31 |     val crackerForceEvaluation = propertyLoad.getBoolean("crackerForceEvaluation", true)
 32 |     val crackerSkipPropagation = propertyLoad.getBoolean("crackerSkipPropagation", false)
 33 | 
 34 |     val (edgePruning, obliviousSeed, fcs) = getOptimizations(propertyLoad.get("optimizations", "111"))
 35 | 
 36 |     val property = propertyLoad.getImmutable
 37 |     val cracker = new CrackerAlgorithm(property)
 38 | 
 39 |     val util = new CCUtil(property)
 40 |     val spark = util.getSparkContext()
 41 |     val stats = new CrackerStats(property, util, spark)
 42 | 
 43 |     val timeSparkLoaded = System.currentTimeMillis()
 44 |     val file = spark.textFile(property.dataset, property.sparkPartition)
 45 | 
 46 |     util.io.printFileStart(property.appName)
 47 | 
 48 |     //            val (parsedData, fusedData) = util.loadVertexEdgeFile(file)
 49 |     val (parsedData, fusedData) = util.loadEdgeFromFile(file)
 50 | 
 51 |     var ret = fusedData.map(item => (item._1, new CrackerTreeMessageIdentification((item._2.toSet + item._1).min, item._2.toSet)))
 52 | 
 53 |     val timeDataLoaded = System.currentTimeMillis()
 54 | 
 55 |     var control = false;
 56 |     var step = 0
 57 | 
 58 |     var treeRDD: Option[RDD[(Long, CrackerTreeMessageTree)]] = Option.empty
 59 | 
 60 |     // if not done, CC of size 1 are not recognized
 61 |     treeRDD = Option.apply(ret.map(t => (t._1, new CrackerTreeMessageTree(-1, Set()))))
 62 | 
 63 |     // what did i do 3 years ago!?!?!?
 64 |     def forceLoadBalancing(step: Int): Boolean = {
 65 |       step == 0 || step == 2 || step == 8 || step == 16 || step == 32
 66 |       //				step < 10 && step % 3 == 0
 67 |     }
 68 | 
 69 |     while (!control) {
 70 |       // simplification step
 71 |       val timeStepStart = System.currentTimeMillis()
 72 | 
 73 |       stats.printSimplification(step, ret)
 74 | 
 75 |       ret = ret.flatMap(item => cracker.emitBlue(item, true, edgePruning))
 76 | 
 77 |       stats.printMessageStats(step + 1, ret)
 78 | 
 79 |       ret = ret.reduceByKey(cracker.reduceBlue).cache
 80 | 
 81 |       val active = ret.count
 82 |       //				control = active == 0
 83 |       control = active <= property.switchLocal // set the number where to switch in local mode
 84 | 
 85 |       val timeStepBlue = System.currentTimeMillis()
 86 |       util.printTimeStep(step + 1, timeStepBlue - timeStepStart)
 87 | 
 88 |       if (!control) {
 89 |         stats.printSimplification(step + 1, ret)
 90 |         // reduction step
 91 |         val check = step
 92 |         val tmp = ret.flatMap(item => cracker.emitRed(item, forceLoadBalancing(check), obliviousSeed))
 93 |         if (forceLoadBalancing(check)) {
 94 |           util.io.printStat(check, "loadBalancing triggered")
 95 |         }
 96 | 
 97 |         stats.printMessageStats(step + 2, tmp)
 98 | 
 99 |         val tmpReduced = tmp.reduceByKey(cracker.reduceRed)
100 | 
101 |         ret = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get))
102 |         treeRDD = cracker.mergeTree(treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation)
103 | 
104 |         val timeStepEnd = System.currentTimeMillis()
105 |         step = step + 2
106 |         util.io.printTimeStep(timeStepStart, timeStepBlue, timeStepEnd)
107 |         util.printTimeStep(step, timeStepEnd - timeStepBlue)
108 |       } else {
109 |         step = step + 1
110 |         util.io.printTime(timeStepStart, timeStepBlue, "blue")
111 |       }
112 |     }
113 | 
114 |     stats.printSimplification(step, ret)
115 | 
116 |     if (fcs) // run local
117 |     {
118 |       val timeLocalStart = System.currentTimeMillis()
119 |       var retCollected = ret.collect
120 | 
121 |       control = false
122 |       var localStep = 0
123 | 
124 |       while (!control) {
125 |         // simpli
126 |         val tmp = retCollected.flatMap(item => cracker.emitRed(item))
127 | 
128 |         val tmpReduced = tmp.groupBy(t => t._1).toArray.map { case (group, traversable) => (group, traversable.map(t => t._2).reduce(cracker.reduceRed)) }
129 | 
130 |         retCollected = tmpReduced.filter(t => t._2.first.isDefined).map(t => (t._1, t._2.first.get))
131 |         treeRDD = cracker.mergeTree(spark, treeRDD, tmpReduced.filter(t => t._2.second.isDefined).map(t => (t._1, t._2.second.get)), crackerUseUnionInsteadOfJoin, crackerForceEvaluation)
132 | 
133 |         // blue step
134 |         retCollected = retCollected.flatMap(item => cracker.emitBlue(item, false))
135 | 
136 |         retCollected = retCollected.groupBy(t => t._1).toArray.map { case (group, traversable) => (group, traversable.map(t => t._2).reduce(cracker.reduceBlue)) }
137 | 
138 |         val active = retCollected.size
139 |         //					util.io.printStat(active, "active vertices")
140 |         control = active == 0
141 |         localStep += 2
142 |       }
143 | 
144 |       val timeLocalEnd = System.currentTimeMillis()
145 |       util.io.printStat(localStep, "localStep")
146 |       util.io.printStat(timeLocalEnd - timeLocalStart, "localTime")
147 |     }
148 | 
149 |     if (!crackerSkipPropagation) {
150 | 
151 |       var treeRDDPropagationTmp = treeRDD.get
152 | 
153 |       if (crackerUseUnionInsteadOfJoin && crackerCoalescePartition) {
154 |         val timeStepStart = System.currentTimeMillis()
155 |         treeRDDPropagationTmp = treeRDDPropagationTmp.coalesce(property.sparkPartition)
156 |         val timeStepBlue = System.currentTimeMillis()
157 |         util.io.printTime(timeStepStart, timeStepBlue, "coalescing")
158 |       }
159 | 
160 |       stats.printMessageStats(step, treeRDDPropagationTmp)
161 | 
162 |       var treeRDDPropagation = treeRDDPropagationTmp.reduceByKey(cracker.reducePrepareDataForPropagation).map(t => (t._1, t._2.getMessagePropagation(t._1))).cache
163 | 
164 |       control = false
165 |       while (!control) {
166 |         val timeStepStart = System.currentTimeMillis()
167 |         treeRDDPropagation = treeRDDPropagation.flatMap(item => cracker.mapPropagate(item))
168 | 
169 |         stats.printMessageStats(step + 1, treeRDDPropagation)
170 | 
171 |         treeRDDPropagation = treeRDDPropagation.reduceByKey(cracker.reducePropagate).cache
172 |         control = treeRDDPropagation.map(t => t._2.min != -1).reduce { case (a, b) => a && b }
173 | 
174 |         step = step + 1
175 |         val timeStepBlue = System.currentTimeMillis()
176 |         util.io.printTime(timeStepStart, timeStepBlue, "propagation")
177 |         util.printTimeStep(step, timeStepBlue - timeStepStart)
178 |       }
179 | 
180 |       val timeEnd = System.currentTimeMillis()
181 | 
182 |       if (property.printLargestCC) {
183 |         printLargestCC(spark, property, treeRDDPropagation, parsedData)
184 |       }
185 | 
186 |       if(property.printAll) {
187 |         treeRDDPropagation.map(t => t._1+" "+t._2.min).saveAsTextFile(property.outputFile)
188 |       }
189 | 
190 |       util.testEnded(treeRDDPropagation.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b },
191 |         step,
192 |         timeBegin,
193 |         timeEnd,
194 |         timeSparkLoaded,
195 |         timeDataLoaded,
196 |         stats.reduceInputMessageNumberAccumulator.value,
197 |         stats.reduceInputSizeAccumulator.value,
198 |         getBitmaskStat(crackerUseUnionInsteadOfJoin, crackerCoalescePartition, crackerForceEvaluation),
199 |         propertyLoad.get("optimizations", "111"))
200 |     } else {
201 |       val timeEnd = System.currentTimeMillis()
202 |       val vertexNumber = fusedData.count
203 | 
204 |       util.testEnded(treeRDD.get.map(t => (1L, 1)).reduceByKey { case (a, b) => a + b },
205 |         step,
206 |         timeBegin,
207 |         timeEnd,
208 |         timeSparkLoaded,
209 |         timeDataLoaded,
210 |         stats.reduceInputMessageNumberAccumulator.value + cracker.getMessageNumberForPropagation(step, vertexNumber),
211 |         stats.reduceInputSizeAccumulator.value + cracker.getMessageSizeForPropagation(step, vertexNumber),
212 |         getBitmaskStat(crackerUseUnionInsteadOfJoin, crackerCoalescePartition, crackerForceEvaluation),
213 |         propertyLoad.get("optimizations", "111"))
214 |     }
215 |   }
216 | 
217 |   def bool2int(b: Boolean) = if (b) 1 else 0
218 | 
219 |   def printLargestCC(sc: SparkContext, property: CCPropertiesImmutable, tree: RDD[(Long, CrackerTreeMessagePropagation)], edgelist: RDD[(Long, Long)]) = {
220 |     val maxCCId = tree.map(t => (t._2.min, 1)).reduceByKey { case (a, b) => a + b }.max()(new Ordering[Tuple2[Long, Int]]() {
221 |       override def compare(x: (Long, Int), y: (Long, Int)): Int =
222 |         Ordering[Int].compare(x._2, y._2)
223 |     })._1
224 | 
225 |     val maxCCVertex = tree.filter(t => t._2.min == maxCCId).map(t => t._1)
226 | 
227 |     val maxCCVertexBroadcast = sc.broadcast(maxCCVertex.collect.toSet)
228 |     val edgelistFiltered = edgelist.filter { case (s, d) => maxCCVertexBroadcast.value.contains(d) }.collect
229 | 
230 |     val writer = new FileWriter(property.filenameLargestCC, false)
231 | 
232 |     var edge = ""
233 |     for (edge <- edgelistFiltered) {
234 |       writer.write(edge._1 + " " + edge._2 + "\n")
235 |     }
236 | 
237 |     writer.close()
238 | 
239 |     //		edgelistFiltered.saveAsTextFile(property.filenameLargestCC)
240 |   }
241 | 
242 |   def getBitmaskStat(crackerUseUnionInsteadOfJoin: Boolean,
243 |                      crackerCoalescePartition: Boolean,
244 |                      crackerForceEvaluation: Boolean): String = {
245 |     bool2int(crackerUseUnionInsteadOfJoin).toString + bool2int(crackerCoalescePartition).toString + bool2int(crackerForceEvaluation).toString
246 |   }
247 | 
248 |   def getOptimizations(data: String): (Boolean, Boolean, Boolean) = {
249 |     data match {
250 |       case "100" => (true, false, false)
251 |       case "010" => (false, true, false)
252 |       case "010" => (false, false, true)
253 |       case _ => (true, true, true)
254 |     }
255 |   }
256 | 
257 | }


--------------------------------------------------------------------------------
/src/sgc/SGCMain.scala:
--------------------------------------------------------------------------------
  1 | package sgc
  2 | 
  3 | import java.io.FileWriter
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.apache.spark.SparkContext._
  6 | import org.apache.spark.SparkContext
  7 | import org.apache.spark.rdd.RDD
  8 | import util.CCUtil
  9 | import util.CCProperties
 10 | import util.CCProperties
 11 | import org.apache.spark.Accumulator
 12 | 
 13 | object SGCMain
 14 | {
 15 |     def forestInitializationStart( node : (Long, Set[Long])) : Iterable[( Long, (Set[Long], Set[Long] ))] = // id, neighbour, per cui sono min
 16 |     {
 17 |     	var outputList : ListBuffer[( Long, (Set[Long], Set[Long] ))] = new ListBuffer
 18 | 
 19 |     	val min = Math.min(node._2.min, node._1)
 20 | 
 21 |     	if(min != node._1)
 22 |         {
 23 |             outputList.prepend( ( min, (Set(), Set(node._1)) )) // dici al min che non è singleton
 24 |         }
 25 | 
 26 |     	outputList.prepend( ( node._1, (node._2, Set()) ))
 27 | 
 28 |     	outputList
 29 |     }
 30 | 
 31 |     def forestInitializationReduceStart(a : (Set[Long], Set[Long] ), b : (Set[Long], Set[Long] )) =
 32 |     {
 33 |         (a._1 ++ b._1, a._2 ++ b._2)
 34 |     }
 35 | 
 36 |     def forestInitializationEnd( node : (Long, (Set[Long], Set[Long]))) = // id, neighbour, p(v), child(v)
 37 |     {
 38 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long] ))] = new ListBuffer
 39 | 
 40 |     	if(node._2._2.isEmpty) // se singleton
 41 |     	{
 42 |     	    val min = node._2._1.min
 43 | 
 44 |     	    outputList.prepend( ( node._1, (node._2._1, min, Set()) ))
 45 |     	    outputList.prepend( ( min, (Set(), -1, Set(node._1)) ))
 46 |     	} else
 47 |     	{
 48 |     	    val min = Math.min(node._2._1.min, node._1)
 49 | 
 50 |     	    outputList.prepend( ( node._1, (node._2._1, min, node._2._2) ))
 51 |     	}
 52 | 
 53 |         outputList
 54 |     }
 55 | 
 56 |     def forestInitializationEndReduce(a : (Set[Long], Long, Set[Long] ), b : (Set[Long], Long, Set[Long] )) =
 57 |     {
 58 |         (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3)
 59 |     }
 60 | 
 61 |     def starDetectionRule1(node : ( Long, (Set[Long], Long, Set[Long] ))) : Iterable[( Long, (Set[Long], Long, Long, Set[Long] ))] = // neighbor, p(v), p(p(v)),child
 62 |     {
 63 |     	var outputList : ListBuffer[( Long, (Set[Long], Long, Long, Set[Long] ))] = new ListBuffer
 64 | 
 65 |     	val it = node._2._3.iterator // iteratore child
 66 | 
 67 |         while(it.hasNext)
 68 |         {
 69 |             val next = it.next
 70 |             outputList.prepend( ( next, (Set(), -1, node._2._2, Set()) ) )
 71 |         }
 72 | 
 73 |     	if(node._2._2 == node._1) // se min = p(v) == p(p(v))
 74 |     	{
 75 |     	    outputList.prepend( ( node._1, (node._2._1, node._2._2, node._2._2, node._2._3) ) )
 76 |     	} else
 77 |     	{
 78 |     	    outputList.prepend( ( node._1, (node._2._1, node._2._2, -1, node._2._3) ) )
 79 |     	}
 80 | 
 81 |     	outputList
 82 |     }
 83 | 
 84 |     def starDetectionReduce1(a : (Set[Long], Long, Long, Set[Long] ), b : (Set[Long], Long, Long, Set[Long] )) =
 85 |     {
 86 |         (a._1 ++ b._1, Math.max(a._2, b._2), Math.max(a._3, b._3), a._4 ++ b._4)
 87 |     }
 88 | 
 89 |     def starDetectionRule1End2Start(node : ( Long, (Set[Long], Long, Long, Set[Long] ))) : Iterable[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = // neighbour, min, s(v), child
 90 |     {
 91 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = new ListBuffer
 92 | 
 93 |     	if(node._2._2 == node._2._3) // se p(v) == p(p(v))
 94 |     	{
 95 |     	    outputList.prepend( (node._1, (node._2._1, node._2._2, true, node._2._4)) )
 96 |     	} else
 97 |     	{
 98 |     	    outputList.prepend((node._1, (node._2._1, node._2._2, false, node._2._4)) )
 99 | 
100 |     	    if(node._2._3 >= 0)
101 |     	    {
102 |     	    	outputList.prepend( (node._2._3, (Set(), -1, false, Set())) )
103 |     	    	// rule 2, il nonno non può essere star
104 |     	    }
105 |     	}
106 | 
107 |     	outputList
108 |     }
109 | 
110 |     def starDetectionReduce2(a : (Set[Long], Long, Boolean, Set[Long] ), b : (Set[Long], Long, Boolean, Set[Long] )) =
111 |     {
112 |         (a._1 ++ b._1, Math.max(a._2, b._2), a._3 && b._3, a._4 ++ b._4)
113 |     }
114 | 
115 |     def starDetectionRule3(node : ( Long, (Set[Long], Long, Boolean, Set[Long] ))) =
116 |     {
117 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Boolean, Set[Long] ))] = new ListBuffer
118 | 
119 |         if(!node._2._3)
120 |         {
121 |             val it = node._2._4.iterator // iteratore child
122 | 
123 |             while(it.hasNext)
124 |             {
125 |                 val next = it.next
126 |                 outputList.prepend( ( next, (Set(), -1, false, Set()) ) )
127 |             }
128 |         }
129 | 
130 |         outputList.prepend( ( node._1, (node._2._1, node._2._2, node._2._3, node._2._4) ) )
131 | 
132 |         outputList
133 |     }
134 | 
135 |     def starDetectionReduce3(a : (Set[Long], Long, Boolean, Set[Long]), b : (Set[Long], Long, Boolean, Set[Long] )) =
136 |     {
137 |         (a._1 ++ b._1, Math.max(a._2, b._2), a._3 && b._3, a._4 ++ b._4)
138 |     }
139 | 
140 |     def conditionalStartHookingPre(node : ( Long, (Set[Long], Long, Boolean ))) =
141 |     {
142 |         var outputList : ListBuffer[( Long, (Long, Set[Long]) )] = new ListBuffer //min, otherMin
143 | 
144 |         val it = node._2._1.iterator // iteratore neighbour
145 | 
146 |         while(it.hasNext)
147 |         {
148 |             val next = it.next
149 | 
150 |             outputList.prepend( ( next, (-1, Set(node._2._2)) ) )
151 |         }
152 | 
153 |         outputList.prepend( ( node._1, (node._2._2, Set()) ) )
154 | 
155 |         outputList
156 |     }
157 | 
158 |     def getNotMinus(a : Long, b : Long) =
159 |     {
160 |         if(a == -1) b
161 |         else if(b == -1) a
162 |         else Math.min(a, b)
163 |     }
164 | 
165 |     def conditionalStarHookingPreReduce(a : (Long, Set[Long]), b : (Long, Set[Long])) =
166 |     {
167 |         (getNotMinus(a._1, b._1), a._2++b._2)
168 |     }
169 | 
170 |     def conditionalStarHookingPreEnd(unconditional : Boolean, node : (Long, (Long, Set[Long]))) =
171 |     {
172 |         if(unconditional)
173 |         {
174 | 
175 |     	val a = node._2._2.filter(t => t != node._2._1)
176 |     	if(a.isEmpty)
177 |     	{
178 |     	    (node._2._1, -1L)
179 |     	} else
180 | 
181 |         (node._2._1, a.min)
182 |         } else
183 |         {
184 |             if(node._2._2.isEmpty)
185 |                 (node._2._1, -1L)
186 |             else
187 |             (node._2._1, node._2._2.min)
188 |         }
189 |     }
190 | 
191 |     def conditionalStarHookingPreEndReduce(a : Long, b : Long) =
192 |     {
193 |         if(a == -1) b
194 |         else if(b == -1) a
195 |         else
196 |         Math.min(a, b)
197 |     }
198 | 
199 |     def conditionalStartHooking(node : ( Long, ((Set[Long], Long, Boolean, Set[Long]), Option[Long] ))) = // neighbout, min, star, child, (minReceivedChild
200 |     {
201 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer //min, otherMin
202 | 
203 |         if(node._2._1._3 && node._1 == node._2._1._2 && node._2._2.isDefined  && node._2._2.get != -1 && node._2._2.get < node._2._1._2) // se star e root
204 |         {
205 |         	outputList.prepend((node._1, (node._2._1._1, node._2._2.get, node._2._1._4)))
206 |         	outputList.prepend((node._2._2.get, (Set(), -1, Set(node._1))))
207 |         } else
208 |         {
209 |             outputList.prepend((node._1, (node._2._1._1, node._2._1._2, node._2._1._4)))
210 |         }
211 | 
212 |         outputList
213 |     }
214 | 
215 |     def conditionalStartHookingReduce(a : (Set[Long], Long, Set[Long]) , b : (Set[Long], Long, Set[Long]) ) =
216 |     {
217 |         (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3)
218 |     }
219 | 
220 |     def unconditionalStartHooking(node : ( Long, ((Set[Long], Long, Boolean, Set[Long]), Option[Long] ))) = // neighbout, min, star, minReceivedChild
221 |     {
222 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer //min, otherMin
223 | 
224 |         if(node._2._1._3 && node._1 == node._2._1._2 && node._2._2.isDefined && node._2._2.get != -1) // se star e root
225 |         {
226 |         	outputList.prepend((node._1, (node._2._1._1, node._2._2.get, node._2._1._4)))
227 |         	outputList.prepend((node._2._2.get, (Set(), -1, Set(node._1))))
228 |         } else
229 |         {
230 |             outputList.prepend((node._1, (node._2._1._1, node._2._1._2, node._2._1._4)))
231 |         }
232 | 
233 |         outputList
234 |     }
235 | 
236 |     def pointerJumping(node : ( Long, ((Set[Long], Long, Set[Long])))) =
237 |     {
238 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Long) )] = new ListBuffer
239 | 
240 |          val it = node._2._3.iterator // iteratore child
241 | 
242 |         while(it.hasNext)
243 |         {
244 |             val next = it.next
245 | 
246 |             outputList.prepend((next, (Set(), node._2._2, -1)))
247 |         }
248 | 
249 |         if(node._1 == node._2._2)
250 |         {
251 |             outputList.prepend((node._1, (node._2._1, node._2._2, node._2._2)))
252 |         } else
253 |         {
254 |             outputList.prepend((node._1, (node._2._1, -1, node._2._2)))
255 |         }
256 | 
257 |         outputList
258 |     }
259 | 
260 |     def pointerJumpingReduce(a : (Set[Long], Long, Long), b : (Set[Long], Long, Long)) =
261 |     {
262 |         (a._1 ++ b._1, Math.max(a._2, b._2), Math.max(a._3, b._3))
263 |     }
264 | 
265 |     def rebuildChild(node : ( Long, (Set[Long], Long, Long) )) =
266 |     {
267 |         var outputList : ListBuffer[( Long, (Set[Long], Long, Set[Long]) )] = new ListBuffer
268 | 
269 |         outputList.prepend((node._2._2, (Set(), -1, Set(node._1))))
270 |         outputList.prepend((node._1, (node._2._1, node._2._2, Set())))
271 | 
272 |         outputList
273 |     }
274 | 
275 |     def rebuildChildReduce(a : (Set[Long], Long, Set[Long]), b : (Set[Long], Long, Set[Long])) =
276 |     {
277 |         (a._1 ++ b._1, Math.max(a._2, b._2), a._3 ++ b._3)
278 |     }
279 | 
280 |     def iteration(	util : CCUtil,
281 |             		graph : RDD[(Long, (Set[Long], Long, Set[Long]))],
282 |             		printStat : Boolean,
283 |             		reduceInputSizeAccumulator : Accumulator[Long],
284 |             		reduceInputMessageNumberAccumulator : Accumulator[Long]) =
285 |     {
286 |         val timeStepStart = System.currentTimeMillis()
287 | 
288 |     	val rule3 = graph	.flatMap( item => starDetectionRule1( item ) )
289 |                 						.reduceByKey(starDetectionReduce1)
290 |                 						.flatMap( item => starDetectionRule1End2Start( item ) )
291 |                 						.reduceByKey(starDetectionReduce2)
292 |                 						.flatMap( item => starDetectionRule3( item ) )
293 |                 						.reduceByKey(starDetectionReduce3)
294 |                 						.cache
295 |         val preProcessing = rule3.map(t => (t._1, (t._2._1, t._2._2, t._2._3)))//.filter(node => node._1 != node._2._2)
296 |         											.flatMap( item => conditionalStartHookingPre( item ) )
297 |         											.reduceByKey(conditionalStarHookingPreReduce)
298 |         											.map( item => conditionalStarHookingPreEnd(false, item ) )
299 |         											.reduceByKey(conditionalStarHookingPreEndReduce)
300 | 
301 |         val starHooking = rule3.leftOuterJoin(preProcessing).flatMap(item => conditionalStartHooking(item))
302 |         						.reduceByKey(conditionalStartHookingReduce)
303 |         						.cache
304 | 
305 |         val ruleAfterStarHooking = starHooking	.flatMap( item => starDetectionRule1( item ) )
306 |                 						.reduceByKey(starDetectionReduce1)
307 |                 						.flatMap( item => starDetectionRule1End2Start( item ) )
308 |                 						.reduceByKey(starDetectionReduce2)
309 |                 						.flatMap( item => starDetectionRule3( item ) )
310 |                 						.reduceByKey(starDetectionReduce3)
311 |                 						.cache
312 | 
313 |         val preProcessing2 = ruleAfterStarHooking.map(t => (t._1, (t._2._1, t._2._2, t._2._3)))//.filter(node => node._1 != node._2._2)
314 |                 											.flatMap( item => conditionalStartHookingPre( item ) )
315 |                 											.reduceByKey(conditionalStarHookingPreReduce)
316 |                 											.map( item => conditionalStarHookingPreEnd(true, item ) )
317 |                 											.reduceByKey(conditionalStarHookingPreEndReduce)
318 | 
319 |         val unconditionalStarHooking = ruleAfterStarHooking.leftOuterJoin(preProcessing2).flatMap(item => unconditionalStartHooking(item))
320 |              										.reduceByKey(conditionalStartHookingReduce).cache
321 | 
322 |         val pointerJumpingResult = unconditionalStarHooking.flatMap(item => pointerJumping(item)).reduceByKey(pointerJumpingReduce)
323 | 
324 | 
325 |         val restart = pointerJumpingResult.flatMap(rebuildChild).reduceByKey(rebuildChildReduce).cache
326 |         val termination = pointerJumpingResult.filter(t => t._2._2 != t._2._3).count
327 | 
328 | 
329 |         val timeStepEnd = System.currentTimeMillis()
330 |         util.io.printStat(termination, "termination")
331 |         util.io.printStat(timeStepEnd - timeStepStart, "timeIteration")
332 | 
333 | 
334 |         (restart, termination)
335 |     }
336 | 
337 |     def main( args : Array[String] ) : Unit =
338 |         {
339 |            val timeBegin = System.currentTimeMillis()
340 | 
341 |             val property = new CCProperties("SGC", args(0)).load.getImmutable
342 | 
343 |             val util = new CCUtil(property)
344 |             val spark = util.getSparkContext()
345 | 
346 |             val timeSparkLoaded = System.currentTimeMillis()
347 |             val file = spark.textFile( property.dataset , property.sparkPartition)
348 | 
349 |             util.io.printFileStart(property.appName)
350 | 
351 |             val (parsedData, fusedData) = util.loadEdgeFromFile(file)
352 | 
353 |             var ret = fusedData.map( item => ( item._1, item._2.toSet) )
354 | 
355 |             val timeDataLoaded = System.currentTimeMillis()
356 | 
357 |             var control = false;
358 | 
359 | 
360 |             val reduceInputMessageNumberAccumulator = spark.accumulator(0L)
361 |             val reduceInputSizeAccumulator = spark.accumulator(0L)
362 | 
363 |             val previous = ret
364 |             var retMap = ret.flatMap( item => forestInitializationStart( item ) )
365 | 
366 |             retMap = retMap.reduceByKey( forestInitializationReduceStart ).cache
367 |             retMap.count
368 | 
369 |             var forestOut = retMap.flatMap( item => forestInitializationEnd( item ) )
370 |             						.reduceByKey(forestInitializationEndReduce)
371 | 
372 |             var (graph, termination) = iteration(util, forestOut, property.printMessageStat, reduceInputMessageNumberAccumulator, reduceInputSizeAccumulator)
373 | 
374 |             var step = 2 + 14
375 | 
376 |             while(termination != 0)
377 |             {
378 |             	val (graph2, termination2) = iteration(util, graph, property.printMessageStat, reduceInputMessageNumberAccumulator, reduceInputSizeAccumulator)
379 |             	graph = graph2
380 |             	termination = termination2
381 |             	step = step + 14
382 |             }
383 | 
384 |             val timeEnd = System.currentTimeMillis()
385 | 
386 |             util.testEnded(	graph.map(t => (t._2._2, 1)).reduceByKey{case (a,b) => a+b},
387 |             				step,
388 |             				timeBegin,
389 |             				timeEnd,
390 |             				timeSparkLoaded,
391 |             				timeDataLoaded,
392 |             				reduceInputMessageNumberAccumulator.value,
393 |             				reduceInputSizeAccumulator.value)
394 |         }
395 | 
396 | }
397 | 


--------------------------------------------------------------------------------