├── Mizan-0.1bu1
    ├── Release
    │   └── src
    │   │   └── dataManager
    │   │       └── dataStructures
    │   │           └── data
    │   │               └── subdir.mk
    └── src
    │   ├── Mizan.h
    │   ├── algorithms
    │       ├── MST.h
    │       ├── SSSP.h
    │       ├── WCC.h
    │       ├── dimEst.h
    │       └── pageRank.h
    │   ├── communication
    │       └── dataStructures
    │       │   └── general.h
    │   ├── dataManager
    │       └── dataStructures
    │       │   └── data
    │       │       ├── mMSTEdgeValue.cpp
    │       │       ├── mMSTEdgeValue.h
    │       │       ├── mMSTVertexValue.cpp
    │       │       └── mMSTVertexValue.h
    │   ├── general.h
    │   ├── main.cpp
    │   └── tools
    │       └── argParser.h
├── README.md
├── benchmark
    ├── bench-all.sh
    ├── common
    │   ├── bench-finish.sh
    │   ├── bench-init.sh
    │   ├── cleanup-bench.sh
    │   ├── get-configs.sh
    │   ├── get-dirs.sh
    │   └── ssh-check.sh
    ├── datasets
    │   ├── Makefile
    │   ├── convert-adj.sh
    │   ├── convert-mst.sh
    │   ├── load-files.sh
    │   ├── load-splits.sh
    │   ├── mst-convert.cpp
    │   ├── snap-convert.cpp
    │   ├── snap-revert.cpp
    │   └── split-input.sh
    ├── giraph
    │   ├── benchall.sh
    │   ├── dimest.sh
    │   ├── kill-java-job.sh
    │   ├── mst.sh
    │   ├── pagerank.sh
    │   ├── prtolfinder.sh
    │   ├── recompile-giraph.sh
    │   ├── sssp.sh
    │   └── wcc.sh
    ├── gps
    │   ├── benchall.sh
    │   ├── debug-site.sh
    │   ├── dimest.sh
    │   ├── disable-dimest-fix.sh
    │   ├── enable-dimest-fix.sh
    │   ├── init.sh
    │   ├── mst.sh
    │   ├── pagerank.sh
    │   ├── recompile-gps.sh
    │   ├── sssp.sh
    │   ├── start-nodes.sh
    │   ├── stop-nodes.sh
    │   └── wcc.sh
    ├── graphlab
    │   ├── benchall.sh
    │   ├── dimest.sh
    │   ├── init.sh
    │   ├── pagerank.sh
    │   ├── recompile-graphlab.sh
    │   ├── sssp.sh
    │   └── wcc.sh
    ├── hadoop
    │   ├── init.sh
    │   └── restart-hadoop.sh
    ├── init-all.sh
    ├── local-init.sh
    ├── mizan
    │   ├── benchall.sh
    │   ├── dimest.sh
    │   ├── init.sh
    │   ├── mst.sh
    │   ├── pagerank.sh
    │   ├── premizan.sh
    │   ├── recompile-mizan.sh
    │   ├── sssp.sh
    │   └── wcc.sh
    ├── parsers
    │   ├── batch-parser.py
    │   └── log-checker.sh
    └── readme.txt
├── ec2
    └── uw-ec2.py
├── giraph-1.0.0
    ├── findbugs-exclude.xml
    ├── giraph-core
    │   └── src
    │   │   └── main
    │   │       └── java
    │   │           └── org
    │   │               └── apache
    │   │                   └── giraph
    │   │                       └── io
    │   │                           └── formats
    │   │                               ├── JsonLongLongLongLongVertexInputFormat.java
    │   │                               └── JsonLongLongNullLongVertexInputFormat.java
    └── giraph-examples
    │   └── src
    │       └── main
    │           └── java
    │               └── org
    │                   └── apache
    │                       └── giraph
    │                           └── examples
    │                               ├── ConnectedComponentsInputFormat.java
    │                               ├── ConnectedComponentsVertex.java
    │                               ├── DiameterEstimationInputFormat.java
    │                               ├── DiameterEstimationVertex.java
    │                               ├── JsonLongLongArrayInputFormat.java
    │                               ├── JsonLongMSTVertexInputFormat.java
    │                               ├── MinimumSpanningTreeInputFormat.java
    │                               ├── MinimumSpanningTreeVertex.java
    │                               ├── PageRankTolFinderVertex.java
    │                               ├── SimplePageRankInputFormat.java
    │                               ├── SimplePageRankVertex.java
    │                               └── SimpleShortestPathsInputFormat.java
├── gps-rev-110
    ├── local-master-scripts
    │   └── make_gps_node_runner_jar.sh
    └── src
    │   └── java
    │       └── gps
    │           ├── examples
    │               ├── dimest
    │               │   └── DiameterEstimationVertex.java
    │               ├── pagerank
    │               │   └── PageRankVertex.java
    │               ├── sssp
    │               │   ├── SSSPVertex.java
    │               │   └── SingleSourceAllVerticesShortestPathVertex.java
    │               └── wcc
    │               │   └── WeaklyConnectedComponentsVertex.java
    │           ├── messages
    │               └── storage
    │               │   ├── ArrayBackedIncomingMessageStorage.javaDIMEST
    │               │   └── ArrayBackedIncomingMessageStorage.javaORIGINAL
    │           ├── node
    │               ├── GPSNodeRunner.java
    │               └── worker
    │               │   └── dynamic
    │               │       ├── VertexWrapper.java
    │               │       └── greedy
    │               │           ├── BaseGreedyDynamicGPSWorkerImpl.java
    │               │           ├── onesync
    │               │               ├── OneSyncDynamicMessageSender.java
    │               │               └── OneSyncLaggingGreedyDynamicGPSWorker.java
    │               │           └── twosync
    │               │               └── TwoSyncGreedyDynamicGPSWorker.java
    │           └── writable
    │               └── LongArrayWritable.java
├── graphlab-2a063b3829
    ├── src
    │   └── graphlab
    │   │   └── graph
    │   │       ├── builtin_parsers.hpp
    │   │       └── distributed_graph.hpp
    └── toolkits
    │   └── graph_analytics
    │       ├── connected_component.cpp
    │       ├── pagerank.cpp
    │       └── sssp.cpp
└── results
    └── plots
        ├── all-plots.tex
        ├── constants.py
        ├── data_mem.py
        ├── data_mem_master.py
        ├── data_net.py
        ├── data_net_master.py
        ├── data_time.py
        ├── gen-all.sh
        ├── gen-data.py
        ├── plot-all.sh
        ├── plot-paper.sh
        ├── plot-with-cuts.py
        └── plot.py


/Mizan-0.1bu1/Release/src/dataManager/dataStructures/data/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/dataManager/dataStructures/data/IdataType.cpp \
 8 | ../src/dataManager/dataStructures/data/mArrayIntTagNK.cpp \
 9 | ../src/dataManager/dataStructures/data/mCharArray.cpp \
10 | ../src/dataManager/dataStructures/data/mCharArrayNoCpy.cpp \
11 | ../src/dataManager/dataStructures/data/mDouble.cpp \
12 | ../src/dataManager/dataStructures/data/mDoubleArray.cpp \
13 | ../src/dataManager/dataStructures/data/mInt.cpp \
14 | ../src/dataManager/dataStructures/data/mIntCharArrayPair.cpp \
15 | ../src/dataManager/dataStructures/data/mIntTagDouble.cpp \
16 | ../src/dataManager/dataStructures/data/mLong.cpp \
17 | ../src/dataManager/dataStructures/data/mLongArray.cpp \
18 | ../src/dataManager/dataStructures/data/mMSTVertexValue.cpp \
19 | ../src/dataManager/dataStructures/data/mMSTEdgeValue.cpp 
20 | 
21 | OBJS += \
22 | ./src/dataManager/dataStructures/data/IdataType.o \
23 | ./src/dataManager/dataStructures/data/mArrayIntTagNK.o \
24 | ./src/dataManager/dataStructures/data/mCharArray.o \
25 | ./src/dataManager/dataStructures/data/mCharArrayNoCpy.o \
26 | ./src/dataManager/dataStructures/data/mDouble.o \
27 | ./src/dataManager/dataStructures/data/mDoubleArray.o \
28 | ./src/dataManager/dataStructures/data/mInt.o \
29 | ./src/dataManager/dataStructures/data/mIntCharArrayPair.o \
30 | ./src/dataManager/dataStructures/data/mIntTagDouble.o \
31 | ./src/dataManager/dataStructures/data/mLong.o \
32 | ./src/dataManager/dataStructures/data/mLongArray.o \
33 | ./src/dataManager/dataStructures/data/mMSTVertexValue.o \
34 | ./src/dataManager/dataStructures/data/mMSTEdgeValue.o 
35 | 
36 | CPP_DEPS += \
37 | ./src/dataManager/dataStructures/data/IdataType.d \
38 | ./src/dataManager/dataStructures/data/mArrayIntTagNK.d \
39 | ./src/dataManager/dataStructures/data/mCharArray.d \
40 | ./src/dataManager/dataStructures/data/mCharArrayNoCpy.d \
41 | ./src/dataManager/dataStructures/data/mDouble.d \
42 | ./src/dataManager/dataStructures/data/mDoubleArray.d \
43 | ./src/dataManager/dataStructures/data/mInt.d \
44 | ./src/dataManager/dataStructures/data/mIntCharArrayPair.d \
45 | ./src/dataManager/dataStructures/data/mIntTagDouble.d \
46 | ./src/dataManager/dataStructures/data/mLong.d \
47 | ./src/dataManager/dataStructures/data/mLongArray.d \
48 | ./src/dataManager/dataStructures/data/mMSTVertexValue.d \
49 | ./src/dataManager/dataStructures/data/mMSTEdgeValue.d 
50 | 
51 | 
52 | # Each subdirectory must supply rules for building sources it contributes
53 | src/dataManager/dataStructures/data/%.o: ../src/dataManager/dataStructures/data/%.cpp
54 | 	@echo 'Building file: $<'
55 | 	@echo 'Invoking: GCC C++ Compiler'
56 | 	mpic++ -I$(MPI_HOME)/include -I$(BOOST_ROOT)/include -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux -I$(HADOOP_HOME)/src/c++/libhdfs -O3 -w -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
57 | 	@echo 'Finished building: $<'
58 | 	@echo ' '
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/algorithms/SSSP.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SSSP.h
  3 |  *
  4 |  * Created on: Nov 17 2013
  5 |  * Authors: Jack Jin, Jenny Wang, Young Han
  6 |  */
  7 | 
  8 | #ifndef SSSP_H_
  9 | #define SSSP_H_
 10 | 
 11 | #include "../IsuperStep.h"
 12 | #include "../Icombiner.h"
 13 | #include "../dataManager/dataStructures/data/mLong.h"
 14 | 
 15 | #define INF      mLong(LLONG_MAX)
 16 | 
 17 | // combiner that takes the minimum of all messages
 18 | class SSSPCombiner: public Icombiner<mLong, mLong, mLong, mLong> {
 19 | private:
 20 |   // NOTE: making this into a macro is dangerous!!
 21 |   mLong min(mLong a, mLong b) {
 22 |     return (a < b) ? a : b;
 23 |   }
 24 | 
 25 | public:
 26 |   void combineMessages(mLong dst, messageIterator<mLong> * messages,
 27 |                        messageManager<mLong, mLong, mLong, mLong> * mManager) {
 28 | 
 29 |     mLong minDist = INF;
 30 |     while (messages->hasNext()) {
 31 |       minDist = min(minDist, messages->getNext());
 32 |     }
 33 | 
 34 |     // send message if minDist is not INF
 35 |     if (minDist == INF) {
 36 |     } else {
 37 |       mManager->sendMessage(dst, minDist);
 38 |     }
 39 |   }
 40 | };
 41 | 
 42 | /*
 43 |  * Template types are <K, V1, M, A> where
 44 |  *   K:  ID class
 45 |  *   V1: vertex value class
 46 |  *   M:  message value class
 47 |  *   A:  aggregation class
 48 |  *
 49 |  * For SSSP, vertex and message values are both mLong
 50 |  */
 51 | class SSSP: public IsuperStep<mLong, mLong, mLong, mLong> {
 52 | private:
 53 |   mLong srcID;
 54 |   int maxSuperStep;
 55 | 
 56 |   bool isSrc(mLong id) {
 57 |     return (id == srcID);
 58 |   }
 59 | 
 60 |   mLong min(mLong a, mLong b) {
 61 |     return (a < b) ? a : b;
 62 |   }
 63 | 
 64 | public:
 65 |   /**
 66 |    * \param srcID The vertex ID of the source.
 67 |    * \param maxSS The maximum number of supersteps.
 68 |    */
 69 |   SSSP(mLong srcID, int maxSS) : srcID(srcID), maxSuperStep(maxSS) {}
 70 | 
 71 |   void initialize(userVertexObject<mLong, mLong, mLong, mLong> * data) {    
 72 |     // start all vertices with INF distance
 73 |     data->setVertexValue(INF);
 74 | 
 75 |     // TODO: HACK. Mizan does not read in edge values,
 76 |     // so let's assign everybody 1s
 77 |     for (int i = 0; i < data->getOutEdgeCount(); i++) {
 78 |       data->setOutEdgeValue( data->getOutEdgeID(i), mLong(1) );
 79 |     }
 80 |   }
 81 | 
 82 |   void compute(messageIterator<mLong> * messages,
 83 |                userVertexObject<mLong, mLong, mLong, mLong> * data,
 84 |                messageManager<mLong, mLong, mLong, mLong> * comm) {
 85 | 
 86 |     // can use getValue() to convert mLong to long long
 87 |     mLong currDist = data->getVertexValue();
 88 | 
 89 |     // potential new minimum distance
 90 |     mLong newDist = isSrc(data->getVertexID()) ? mLong(0) : INF;
 91 | 
 92 |     while (messages->hasNext()) {
 93 |       // cout << "receiving msg at ss=" << data->getCurrentSS() << " at id=" << data->getVertexID().getValue() << endl;
 94 |       newDist = min(newDist, messages->getNext());
 95 |     }
 96 | 
 97 |     // if new distance is smaller, notify out edges
 98 |     if (newDist < currDist) {
 99 |       data->setVertexValue(newDist);
100 | 
101 |       for (int i = 0; i < data->getOutEdgeCount(); i++) {
102 |         // cout << "sending msg at ss=" << data->getCurrentSS() << " to id=" << data->getOutEdgeID(i).getValue() << endl;
103 |         // (outEdgeValue is the value of an outgoing edge)
104 |         comm->sendMessage(data->getOutEdgeID(i),
105 |                           mLong(newDist.getValue() + data->getOutEdgeValue(i).getValue()));
106 |       }
107 |     }
108 | 
109 |     // always vote to halt
110 |     data->voteToHalt();
111 |   }
112 | };
113 | #endif /* SSSP_H_ */
114 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/algorithms/WCC.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * WCC.h
 3 |  *
 4 |  * Created on: Nov 17 2013
 5 |  * Authors: Jack Jin, Jenny Wang, Young Han
 6 |  */
 7 | 
 8 | #ifndef WCC_H_
 9 | #define WCC_H_
10 | 
11 | #include "../IsuperStep.h"
12 | #include "../Icombiner.h"
13 | #include "../dataManager/dataStructures/data/mLong.h"
14 | 
15 | #define INF      mLong(LLONG_MAX)
16 | 
17 | // combiner that takes the minimum of all messages
18 | class WCCCombiner: public Icombiner<mLong, mLong, mLong, mLong> {
19 | private:
20 |   // NOTE: making this into a macro is dangerous!!
21 |   mLong min(mLong a, mLong b) {
22 |     return (a < b) ? a : b;
23 |   }
24 | 
25 | public:
26 |   void combineMessages(mLong dst, messageIterator<mLong> * messages,
27 |                        messageManager<mLong, mLong, mLong, mLong> * mManager) {
28 | 
29 |     mLong minCompID = INF;
30 |     while (messages->hasNext()) {
31 |       minCompID = min(minCompID, messages->getNext());
32 |     }
33 | 
34 |     // send message if minCompID is not INF
35 |     if (minCompID == INF) {
36 |     } else {
37 |       mManager->sendMessage(dst, minCompID);
38 |     }
39 |   }
40 | };
41 | 
42 | /*
43 |  * Template types are <K, V1, M, A> where
44 |  *   K:  ID class
45 |  *   V1: vertex value class
46 |  *   M:  message value class
47 |  *   A:  aggregation class
48 |  *
49 |  * For WCC, vertex and message values are both mLong
50 |  */
51 | class WCC: public IsuperStep<mLong, mLong, mLong, mLong> {
52 | private:
53 |   int maxSuperStep;
54 | 
55 |   mLong min(mLong a, mLong b) {
56 |     return (a < b) ? a : b;
57 |   }
58 | 
59 | public:
60 |   /**
61 |    * \param srcID The vertex ID of the source.
62 |    * \param maxSS The maximum number of supersteps.
63 |    */
64 |   WCC(int maxSS) : maxSuperStep(maxSS) {}
65 | 
66 |   void initialize(userVertexObject<mLong, mLong, mLong, mLong> * data) {    
67 |     // all vertices start w/ component IDs being their own vertex ID
68 |     data->setVertexValue(data->getVertexID());
69 |   }
70 | 
71 |   void compute(messageIterator<mLong> * messages,
72 |                userVertexObject<mLong, mLong, mLong, mLong> * data,
73 |                messageManager<mLong, mLong, mLong, mLong> * comm) {
74 | 
75 |     // can use getValue() to convert mLong to long long
76 |     mLong currCompID = data->getVertexValue();
77 |     mLong newCompID = currCompID;
78 | 
79 |     while (messages->hasNext()) {
80 |       newCompID = min(newCompID, messages->getNext());
81 |     }
82 | 
83 |     // if new component ID is smaller, notify neighbours
84 |     // OR, if first supersteps, send message
85 |     if (newCompID < currCompID || data->getCurrentSS() == 1) {
86 |       data->setVertexValue(newCompID);
87 | 
88 |       for (int i = 0; i < data->getOutEdgeCount(); i++) {
89 |         // (outEdgeValue is the value of an outgoing edge)
90 |         comm->sendMessage(data->getOutEdgeID(i), newCompID);
91 |       }
92 |     }
93 | 
94 |     // always vote to halt
95 |     data->voteToHalt();
96 |   }
97 | };
98 | #endif /* WCC_H_ */
99 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/algorithms/dimEst.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * dimEst.h
  3 |  *
  4 |  * Created on: Sep 17, 2012
  5 |  * Author: refops
  6 |  *
  7 |  * Modified by Young
  8 |  */
  9 | 
 10 | #ifndef DIMEST_H_
 11 | #define DIMEST_H_
 12 | 
 13 | #include "../IsuperStep.h"
 14 | #include "../dataManager/dataStructures/data/mLongArray.h"
 15 | #include "../dataManager/dataStructures/data/mLong.h"
 16 | #include "../dataManager/dataStructures/data/mInt.h"
 17 | #include <boost/random/linear_congruential.hpp>
 18 | #include <boost/random/uniform_int.hpp>
 19 | #include <boost/random/uniform_real.hpp>
 20 | #include <boost/random/variate_generator.hpp>
 21 | #include <boost/generator_iterator.hpp>
 22 | #include <boost/random/mersenne_twister.hpp>
 23 | 
 24 | class dimEst: public IsuperStep<mLong, mLongArray, mLongArray, mLong> {
 25 | private:
 26 |   int maxSuperStep;
 27 |   int k;
 28 |   boost::mt19937 * generator;
 29 |   boost::uniform_real<> * uni_dist;
 30 |   boost::variate_generator<boost::mt19937&, boost::uniform_real<> > * uni;
 31 |   const static long long v62 = 62;
 32 |   const static long long v1 = 1;
 33 | 
 34 | public:
 35 |   dimEst(int inMaxSS) {
 36 |     k = 8;
 37 |     maxSuperStep = inMaxSS;
 38 | 
 39 |     generator = new boost::mt19937(std::time(0));
 40 |     uni_dist = new boost::uniform_real<>(0, 1);
 41 |     uni = new boost::variate_generator<boost::mt19937&,
 42 |                                        boost::uniform_real<> >(*generator, *uni_dist);
 43 |   }
 44 |   void initialize(
 45 |                   userVertexObject<mLong, mLongArray, mLongArray, mLong> * data) {
 46 |     mLong * value = new mLong[k];
 47 |     int finalBitCount = 63;
 48 |     long rndVal = 0;
 49 |     for (int j = 0; j < k; j++) {
 50 |       rndVal = create_random_bm(finalBitCount);
 51 |       value[j].setValue((v1 << (v62 - rndVal)));
 52 |     }
 53 |     mLongArray valueArray(k, value);
 54 |     data->setVertexValue(valueArray);
 55 |   }
 56 |   void compute(messageIterator<mLongArray> * messages,
 57 |                userVertexObject<mLong, mLongArray, mLongArray, mLong> * data,
 58 |                messageManager<mLong, mLongArray, mLongArray, mLong> * comm) {
 59 | 
 60 |     mLong * newBitMask = new mLong[k];
 61 |     //mLong * oldBitMask = data->getVertexValue().getArray();
 62 |      
 63 |     for (int i = 0; i < k; i++) {
 64 |       // TODO: need to do this, b/c of weird bug where oldBitMask[31] has wrong value
 65 |       newBitMask[i] = data->getVertexValue().getArray()[i]; //oldBitMask[i];
 66 |     }
 67 | 
 68 |     //std::cout << "value: " << newBitMask[31].getValue() << " " << oldBitMask[31].getValue() << " " << data->getVertexValue().getArray()[31].getValue() << std::endl;
 69 | 
 70 |     mLongArray tmpArray;
 71 |     mLong * tmpBitMask;
 72 | 
 73 |     bool isChanged = false;
 74 |     long long a;
 75 |     long long b;
 76 |     long long c;
 77 |     while (messages->hasNext()) {
 78 |       tmpArray = messages->getNext();
 79 |       tmpBitMask = tmpArray.getArray();
 80 |       for (int i = 0; i < k; i++) {
 81 |         a = newBitMask[i].getValue();
 82 |         b = tmpBitMask[i].getValue();
 83 |         c = a | b;
 84 |         newBitMask[i].setValue(c);
 85 | 
 86 |         // NOTE: unused for now---to terminate when all vertices converge,
 87 |         // use an aggregator to track # of vertices that have finished
 88 |         //isChanged = isChanged || (a != c);
 89 |       }
 90 |     }
 91 | 
 92 |     mLongArray outArray(k, newBitMask);
 93 | 
 94 |     // WARNING: we cannot terminate based on LOCAL steady state,
 95 |     // we need all vertices computing until the very end
 96 |     if (data->getCurrentSS() >= maxSuperStep) {
 97 |       data->voteToHalt();
 98 | 
 99 |     } else {
100 |       // use outedges to match Giraph and GPS
101 |       for (int i = 0; i < data->getOutEdgeCount(); i++) {
102 |         comm->sendMessage(data->getOutEdgeID(i), outArray);
103 |       }
104 | 
105 |       data->setVertexValue(outArray);
106 |     }
107 |   }
108 | 
109 |   //Src: Pegasus
110 |   int create_random_bm(int size_bitmask) {
111 |     int j;
112 | 
113 |     // cur_random is between 0 and 1.
114 |     double cur_random = uni->operator ()(); //rand.nextDouble(); //Math.random();
115 |     double threshold = 0;
116 |     for (j = 0; j < size_bitmask - 1; j++) {
117 |       threshold += pow(2.0, -1 * j - 1);
118 | 
119 |       if (cur_random < threshold) {
120 |         break;
121 |       }
122 |     }
123 | 
124 |     return j;
125 |   }
126 | };
127 | #endif /* DIMEST_H_ */
128 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/algorithms/pageRank.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * pageRank.h
 3 |  *
 4 |  *  Created on: Sep 18, 2012
 5 |  *      Author: refops
 6 |  */
 7 | 
 8 | #ifndef PAGERANK_H_
 9 | #define PAGERANK_H_
10 | 
11 | #include "../IsuperStep.h"
12 | #include "../Icombiner.h"
13 | #include "../dataManager/dataStructures/data/mLong.h"
14 | #include "../dataManager/dataStructures/data/mDouble.h"
15 | 
16 | class pageRankCombiner: public Icombiner<mLong, mDouble, mDouble, mLong> {
17 | 
18 |   void combineMessages(mLong dst, messageIterator<mDouble> * messages,
19 |       messageManager<mLong, mDouble, mDouble, mLong> * mManager) {
20 |     double newVal = 0;
21 |     while (messages->hasNext()) {
22 |       double tmp = messages->getNext().getValue();
23 |       newVal = newVal + tmp;
24 |     }
25 |     mDouble messageOut(newVal);
26 |     mManager->sendMessage(dst, messageOut);
27 |   }
28 | };
29 | 
30 | class pageRank: public IsuperStep<mLong, mDouble, mDouble, mLong> {
31 | private:
32 |   //int vertexTotal;
33 |   int maxSuperStep;
34 | 
35 | public:
36 | 
37 |   pageRank(int maxSS) {
38 |     //vertexTotal = 0;
39 |     maxSuperStep = maxSS;
40 |   }
41 |   void initialize(userVertexObject<mLong, mDouble, mDouble, mLong> * data) {
42 |     // NOTE: We follow GraphLab's alternative way of computing PageRank,
43 |     // which is to not divide by |V|. To get the probability value at
44 |     // each vertex, take its PageRank value and divide by |V|.
45 | 
46 |     //if (vertexTotal == 0) {
47 |     //  vertexTotal = data->getGlobalVertexCount();
48 |     //}
49 | 
50 |     // BUGFIX: this should not exist: vertexTotal++;
51 | 
52 |     data->setVertexValue(mDouble(1.0));
53 |     //data->setVertexValue(mDouble(1.0 / (double) vertexTotal));
54 |   }
55 |   void compute(messageIterator<mDouble> * messages,
56 |       userVertexObject<mLong, mDouble, mDouble, mLong> * data,
57 |       messageManager<mLong, mDouble, mDouble, mLong> * comm) {
58 | 
59 |     double currVal = data->getVertexValue().getValue();
60 |     double newVal = 0;
61 |     double c = 0.85;
62 | 
63 |     if (data->getCurrentSS() > 1) {
64 |       while (messages->hasNext()) {
65 |         double tmp = messages->getNext().getValue();
66 |         newVal = newVal + tmp;
67 |       }
68 |       newVal = newVal * c + (1.0 - c);
69 |       //newVal = newVal * c + (1.0 - c) / ((double) vertexTotal);
70 |       data->setVertexValue(mDouble(newVal));
71 |     } else {
72 |       newVal = currVal;
73 |     }
74 | 
75 |     // Termination condition based on max supersteps
76 |     if (data->getCurrentSS() <= maxSuperStep) {
77 |       mDouble outVal(newVal / ((double) data->getOutEdgeCount()));
78 |       for (int i = 0; i < data->getOutEdgeCount(); i++) {
79 |         comm->sendMessage(data->getOutEdgeID(i), outVal);
80 |       }
81 |     } else {
82 |       data->voteToHalt();
83 |     }
84 |   }
85 | };
86 | #endif /* PAGERANK_H_ */
87 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/communication/dataStructures/general.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * general.h
 3 |  *
 4 |  *  Created on: Apr 2, 2012
 5 |  *      Author: refops
 6 |  */
 7 | 
 8 | #ifndef GENERAL_H_
 9 | #define GENERAL_H_
10 | 
11 | #include <string.h>
12 | #include <iostream>
13 | #include <queue>
14 | #include "map"
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | 
18 | using namespace std;
19 | 
20 | #include "boost/thread/mutex.hpp"
21 | #include "boost/thread/exceptions.hpp"
22 | #include "mpi.h"
23 | 
24 | static int KB = 1024;
25 | static int MB = 1024 * 1024;
26 | static int data_msgsize = 4 * 1024;
27 | static int buffer_msgsize = 4 * KB; //* KB
28 | static queue<char*> SYS_cmdQueue;
29 | static queue<char*> DATA_cmdQueue;
30 | 
31 | enum messageStatus {
32 | 	m_success, m_fail,
33 | };
34 | 
35 | enum messageCode {
36 | 	DM, BCast, AllNB,
37 | };
38 | 
39 | enum communicationType {
40 | 	_pt2pt, _ring, _pt2ptb,
41 | };
42 | 
43 | enum msgHeader {
44 | 	_SYS, _DATA, _EXIT_PE,
45 | };
46 | enum SYS_CMDS {
47 | 	DHT_I, //dht_insert
48 | 	DHT_U, //dht_update
49 | 	DHT_A, //dht_ask
50 | 	DHT_R, //dht_response
51 | 	InitVertexCount,
52 | 	FinishInit,
53 | 	EndofSS,
54 | 	StartSS,
55 | 	Terminate,
56 | 	ENDMSG,
57 | 	VertexMigrate,
58 | 	SendSoftVertex,
59 | 	SendHardVertex,
60 | 	StealVertex,
61 | 	SendStolenVertex,
62 | 	StolenVertexResult,
63 | 	GraphMutation,
64 | 	LateStats,LateStatsTerminate,
65 | 	StealBarrier,
66 | 	Aggregator,
67 | 	MigrateBarrier,
68 | };
69 | 
70 | enum DATA_CMDS {
71 | 	SSdata, InNbrs, OutNbrs, ALLVTX, ENDDMSG,
72 | };
73 | enum SYS_CMDS_PRIORITY {
74 | 	NO_PRIORITY, AFTER_DATABUFFER_PRIORITY, INSTANT_PRIORITY
75 | };
76 | 
77 | enum block_type {
78 | 	INT, DOUBLE, CHAR, LONG_LONG,
79 | };
80 | 
81 | static const char* msgHeader_strings[] = { "_SYS", "_DATA", "_EXIT_PE" };
82 | static const char* DATA_CMDS_strings[] = { "SSdata", "InNbrs", "OutNbrs", "ALLVTX", "ENDDMSG" };
83 | static const char* SYS_CMDS_strings[] = { "DHT_I", "DHT_U", "DHT_A", "DHT_R",
84 | 		"InitVertexCount", "FinishInit", "EndofSS", "StartSS", "Terminate",
85 | 		"ENDMSG", "SSExecTime", "VertexMigrate", "SendSoftVertex",
86 | 		"SendHardVertex", "StealVertex", "SendStolenVertex",
87 | 		"StolenVertexResult" };
88 | 
89 | #endif /* GENERAL_H_ */
90 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/dataManager/dataStructures/data/mMSTEdgeValue.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * mMSTEdgeValue.cpp
  3 |  *
  4 |  * Created on: Dec 27, 2013
  5 |  * Author: Young Han
  6 |  */
  7 | 
  8 | #include "mMSTEdgeValue.h"
  9 | #include "mLong.h"
 10 | 
 11 | #define EDGE_VAL_LEN   3
 12 | 
 13 | // indices into value array
 14 | #define I_WEIGHT       0
 15 | #define I_SRC          1
 16 | #define I_DST          2
 17 | 
 18 | /** Constructors/Destructors **/
 19 | mMSTEdgeValue::mMSTEdgeValue() : weight(0), src(0), dst(0) {}
 20 | 
 21 | mMSTEdgeValue::mMSTEdgeValue(long long weight, long long src, long long dst)
 22 |   : weight(weight), src(src), dst(dst) {}
 23 | 
 24 | // copy constructor (same as implicit one)
 25 | mMSTEdgeValue::mMSTEdgeValue(const mMSTEdgeValue& obj) {
 26 |   weight = obj.weight;
 27 |   src = obj.src;
 28 |   dst = obj.dst;
 29 | }
 30 | 
 31 | mMSTEdgeValue::~mMSTEdgeValue() {}
 32 | 
 33 | int mMSTEdgeValue::byteSize() {
 34 |   return sizeof(long long)*EDGE_VAL_LEN;
 35 | }
 36 | 
 37 | std::string mMSTEdgeValue::toString() {
 38 |   // copied from mLongArray.cpp
 39 |   char outArray[31*EDGE_VAL_LEN];
 40 |   sprintf(outArray, "%lld:%lld:%lld:", weight, src, dst);
 41 |   std::string output(outArray);
 42 |   return output;
 43 | }
 44 | 
 45 | void mMSTEdgeValue::readFromCharArray(char * input) {
 46 |   // modified from mLongArray.cpp
 47 | 
 48 |   // should be constant, but whatever
 49 |   char delimiter = ':';
 50 |   mLong array[EDGE_VAL_LEN];
 51 | 
 52 |   int startPtr = 0;
 53 |   int endPtr = 0;
 54 |   for (int i = 0; i < EDGE_VAL_LEN; i++) {
 55 |     char tmpArray[30];
 56 |     while (input[endPtr] != delimiter) {
 57 |       endPtr++;
 58 |     }
 59 |     //12345:668512:999831
 60 |     strncpy(tmpArray, &input[startPtr], (endPtr - startPtr));
 61 |     tmpArray[endPtr - startPtr] = 0;
 62 |     array[i].readFromCharArray(tmpArray);
 63 |     endPtr++;
 64 |     startPtr = endPtr;
 65 |   }
 66 | 
 67 |   weight = array[I_WEIGHT].getValue();
 68 |   src = array[I_SRC].getValue();
 69 |   dst = array[I_DST].getValue();
 70 | }
 71 | 
 72 | char * mMSTEdgeValue::byteEncode(int &size) {
 73 |   // modified from mLongArray.cpp.. basic idea is the same
 74 |   char * output = (char *) calloc(byteSize(), sizeof(char));
 75 |   int j = 0;
 76 |   int tmpSize = 0;
 77 | 
 78 |   mLong array[EDGE_VAL_LEN];
 79 |   array[I_WEIGHT] = mLong(weight);
 80 |   array[I_SRC] = mLong(src);
 81 |   array[I_DST] = mLong(dst);
 82 | 
 83 |   for (int i = 0; i < EDGE_VAL_LEN; i++) {
 84 |     tmpSize = array[i].byteEncode2(&output[j + 1]);
 85 |     output[j] = ((char) tmpSize);
 86 |     j = j + tmpSize + 1;
 87 |   }
 88 |   size = j;
 89 |   return output;
 90 | }
 91 | 
 92 | int mMSTEdgeValue::byteEncode2(char * buffer) {
 93 |   // does not use byteEncode()... presumably to save on space?
 94 |   int j = 0;
 95 |   int tmpSize = 0;
 96 | 
 97 |   mLong array[EDGE_VAL_LEN];
 98 |   array[I_WEIGHT] = mLong(weight);
 99 |   array[I_SRC] = mLong(src);
100 |   array[I_DST] = mLong(dst);
101 | 
102 |   for (int i = 0; i < EDGE_VAL_LEN; i++) {
103 |     tmpSize = array[i].byteEncode2(&buffer[j + 1]);
104 |     buffer[j] = ((char) tmpSize);
105 |     j = j + tmpSize + 1;
106 |   }
107 |   return j;
108 | }
109 | 
110 | void mMSTEdgeValue::byteDecode(int size, char * input) {
111 |   // modified from mLongArray.cpp
112 |   int j = 0;
113 |   int objSize = 0;
114 |   mLong obj;
115 | 
116 |   mLong array[EDGE_VAL_LEN];
117 |   int i = 0;
118 | 
119 |   while (j < size) {
120 |     if (i >= EDGE_VAL_LEN) {
121 |       std::cout << "ERROR in mMSTEdgeValue byteDecode()!!";
122 |       break;
123 |     }
124 | 
125 |     objSize = ((int) input[j]);
126 |     array[i].byteDecode(objSize, &input[j + 1]);
127 |     j = j + objSize + 1;
128 |     i++;
129 |   }
130 | 
131 |   weight = array[I_WEIGHT].getValue();
132 |   src = array[I_SRC].getValue();
133 |   dst = array[I_DST].getValue();
134 | }
135 | 
136 | std::size_t mMSTEdgeValue::local_hash_value() const {
137 |   // just like mLongArray, do hash of first field.. which is long long
138 |   // copied from mLong.cpp
139 |   return weight;
140 | }
141 | 
142 | mMSTEdgeValue & mMSTEdgeValue::operator=(const mMSTEdgeValue& rhs) {
143 |   // yes, same as the implicit assignment...
144 |   weight = rhs.weight;
145 |   src = rhs.src;
146 |   dst = rhs.dst;
147 | }
148 | 
149 | /**
150 |  * Objects are == iff all fields are equal, unlike below.
151 |  */
152 | bool mMSTEdgeValue::operator==(const IdataType& rhs) const {
153 |   return (weight == ((mMSTEdgeValue&) rhs).weight &&
154 |           src == ((mMSTEdgeValue&) rhs).src &&
155 |           dst == ((mMSTEdgeValue&) rhs).dst);
156 | }
157 | 
158 | /**
159 |  * Comparison is based on the weight. If weights are same,
160 |  * then comparison is based on source vertex ID.
161 |  * The destination ID does not play a role.
162 |  * TODO
163 |  */
164 | bool mMSTEdgeValue::operator<(const IdataType& rhs) const {
165 |   return (weight < ((mMSTEdgeValue&) rhs).weight &&
166 |           src < ((mMSTEdgeValue&) rhs).src &&
167 |           dst < ((mMSTEdgeValue&) rhs).dst);
168 | 
169 | //  if (weight == rhs.weight) {
170 | //    return (src < rhs.src);
171 | //  }
172 | //
173 | //  return (weight < rhs.weight);
174 | }
175 | 
176 | bool mMSTEdgeValue::operator>(const IdataType &rhs) const {
177 |   return (weight > ((mMSTEdgeValue&) rhs).weight &&
178 |           src > ((mMSTEdgeValue&) rhs).src &&
179 |           dst > ((mMSTEdgeValue&) rhs).dst);
180 | }
181 | 
182 | bool mMSTEdgeValue::operator<=(const IdataType &rhs) const {
183 |   return (weight <= ((mMSTEdgeValue&) rhs).weight &&
184 |           src <= ((mMSTEdgeValue&) rhs).src &&
185 |           dst <= ((mMSTEdgeValue&) rhs).dst);
186 | }
187 | 
188 | bool mMSTEdgeValue::operator>=(const IdataType &rhs) const {
189 |   return (weight >= ((mMSTEdgeValue&) rhs).weight &&
190 |           src >= ((mMSTEdgeValue&) rhs).src &&
191 |           dst >= ((mMSTEdgeValue&) rhs).dst);
192 | }
193 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/dataManager/dataStructures/data/mMSTEdgeValue.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * mMSTEdgeValue.h
 3 |  *
 4 |  * Created on: Dec 27, 2013
 5 |  * Author: Young Han
 6 |  */
 7 | 
 8 | #ifndef MMSTEDGEVALUE_H_
 9 | #define MMSTEDGEVALUE_H_
10 | 
11 | #include "IdataType.h"
12 | 
13 | /**
14 |  * MST edge and vertex value representations
15 |  */
16 | class mMSTEdgeValue: public IdataType {
17 | private:
18 |   long long weight;
19 |   long long src;    // original source
20 |   long long dst;    // original destination
21 | public:
22 | 	mMSTEdgeValue();
23 |   mMSTEdgeValue(long long weight, long long src, long long dst);
24 | 	mMSTEdgeValue(const mMSTEdgeValue& obj);
25 | 	~mMSTEdgeValue();
26 | 	int byteSize();
27 | 	std::string toString();
28 | 	void readFromCharArray(char * input);
29 | 	char * byteEncode(int &size);
30 | 	int byteEncode2(char * buffer);
31 | 	void byteDecode(int size, char * input);
32 | 	std::size_t local_hash_value() const;
33 | 	mMSTEdgeValue & operator=(const mMSTEdgeValue& rhs);
34 | 	bool operator==(const IdataType& rhs) const;
35 | 	bool operator<(const IdataType& rhs) const;
36 | 	bool operator>(const IdataType &rhs) const;
37 | 	bool operator<=(const IdataType &rhs) const;
38 | 	bool operator>=(const IdataType &rhs) const;
39 | 
40 |   void cleanUp() {}
41 | 
42 | 	//Class specific methods
43 |   long long getWeight() { return weight; }
44 | 	long long getSrc() { return src; }
45 | 	long long getDst() { return dst;}
46 | 
47 |   // no setters---edge value should be immutable
48 | };
49 | #endif /* MMSTEDGEVALUE_H_ */
50 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/dataManager/dataStructures/data/mMSTVertexValue.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * mMSTVertexValue.h
 3 |  *
 4 |  * Created on: Dec 27, 2013
 5 |  * Author: Young Han
 6 |  */
 7 | 
 8 | #ifndef MMSTVERTEXVALUE_H_
 9 | #define MMSTVERTEXVALUE_H_
10 | 
11 | #include "IdataType.h"
12 | 
13 | /**
14 |  * Enum constants
15 |  */
16 | // phases of computation
17 | enum MSTPhase {
18 |   PHASE_1,   // find min-weight edge
19 |   PHASE_2A,  // question phase
20 |   PHASE_2B,  // Q /and/ A phase
21 |   PHASE_3A,  // send supervertex IDs
22 |   PHASE_3B,  // receive PHASE_3A messages
23 |   PHASE_4A,  // send edges to supervertex
24 |   PHASE_4B   // receive/merge edges
25 | };
26 | 
27 | // vertex types
28 | enum MSTVertexType {
29 |   TYPE_UNKNOWN,                 // initial state in Phase 2A
30 |   TYPE_SUPERVERTEX,             // supervertex
31 |   TYPE_POINTS_AT_SUPERVERTEX,   // child of supervertex
32 |   TYPE_POINTS_AT_SUBVERTEX      // child of child of supervertex
33 | };
34 | 
35 | /**
36 |  * MST edge and vertex value representations
37 |  */
38 | class mMSTVertexValue: public IdataType {
39 | private:
40 |   long long weight;
41 |   long long src;        // original source
42 |   long long dst;        // original destination
43 | 
44 |   MSTPhase phase;       // computation phase
45 |   MSTVertexType type;   // vertex type
46 |   long long pointer;    // vertex's (potential) supervertex
47 | 
48 | public:
49 |   mMSTVertexValue();
50 |   mMSTVertexValue(long long weight, long long src, long long dst,
51 |                   MSTPhase phase, MSTVertexType type, long long pointer);
52 | 
53 |   // NOTE: This is only for compatibility when used as a mMSTEdgeValue.
54 |   // Once Mizan supports separate edge value types, this should be deleted!
55 |   mMSTVertexValue(long long weight, long long src, long long dst);
56 | 
57 |   mMSTVertexValue(const mMSTVertexValue& obj);
58 |   ~mMSTVertexValue();
59 |   int byteSize();
60 |   std::string toString();
61 |   void readFromCharArray(char * input);
62 |   char * byteEncode(int &size);
63 |   int byteEncode2(char * buffer);
64 |   void byteDecode(int size, char * input);
65 |   std::size_t local_hash_value() const;
66 |   mMSTVertexValue & operator=(const mMSTVertexValue& rhs);
67 |   bool operator==(const IdataType& rhs) const;
68 |   bool operator<(const IdataType& rhs) const;
69 |   bool operator>(const IdataType &rhs) const;
70 |   bool operator<=(const IdataType &rhs) const;
71 |   bool operator>=(const IdataType &rhs) const;
72 | 
73 |   void cleanUp() {}
74 | 
75 |   //Class specific methods
76 |   long long getWeight() { return weight; }
77 |   long long getSrc() { return src; }
78 |   long long getDst() { return dst;}
79 | 
80 |   MSTPhase getPhase() { return phase; }
81 |   MSTVertexType getType() { return type; }
82 |   long long getPointer() { return pointer; }
83 | 
84 |   void setWeight(long long w) { weight = w; }
85 |   void setDst(long long d) { dst = d; }
86 |   void setSrc(long long s) { src = s; }
87 | 
88 |   void setPhase(MSTPhase ph) { phase = ph; }
89 |   void setType(MSTVertexType t) { type = t; }
90 |   void setPointer(long long p) { pointer = p; }
91 | };
92 | #endif /* MMSTVERTEXVALUE_H_ */
93 | 


--------------------------------------------------------------------------------
/Mizan-0.1bu1/src/general.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * general.h
 3 |  *
 4 |  *  Created on: Jun 13, 2012
 5 |  *      Author: refops
 6 |  */
 7 | 
 8 | #ifndef GENERALMIZAN_H_
 9 | #define GENERALMIZAN_H_
10 | #include <map>
11 | 
12 | #include "IAggregator.h"
13 | #include "computation/systemWideInfo.h"
14 | #include "communication/sysComm.h"
15 | #include "boost/thread.hpp"
16 | #include "dataManager/dataStructures/general.h"
17 | 
18 | template<class K, class V1, class M, class A> class sysComm;
19 | template<class K, class V1, class M, class A> class userComm;
20 | 
21 | template<class K, class V1, class M, class A>
22 | struct systemDataPointer {
23 | 	std::map<char *, IAggregator<A> *> aggContainer;
24 | 	boost::mutex aggContainerLock;
25 | 	systemWideInfo<K> sysInfo;
26 | 	sysComm<K, V1, M, A> * sc;
27 | 	userComm<K, V1, M, A> * uc;
28 | };
29 | 
30 | struct MizanArgs {
31 | 	int algorithm;
32 | 	int clusterSize;
33 | 	std::string graphName;
34 | 	fileSystem fs;
35 | 	distType partition;
36 | 	std::string hdfsUserName;
37 | 	migrationMode migration;
38 | 	communicationType communication;
39 | 	int superSteps;
40 |   // NOTE: this is "hacked" in... a better way is for the
41 |   // relevant algorithms to parse a portion of the arguments
42 |   long srcID;
43 | };
44 | 
45 | #endif /* GENERALMIZAN_H_ */
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | graph-processing
2 | ================
3 | 
4 | A comparison of graph processing systems. Please see the [wiki](https://github.com/xvz/graph-processing/wiki/)!
5 | 


--------------------------------------------------------------------------------
/benchmark/bench-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Runs all the benchmarks.
 4 | #
 5 | # The batch-benchmarking scripts are quite primitive, simply because
 6 | # when things fail it's usually easier to intervene manually.
 7 | #
 8 | # We recommend running this in a "screen" so a terminated ssh
 9 | # connection doesn't kill it.
10 | #
11 | # Use "screen" to start a screen and run "./bench-all.sh" within it.
12 | # Detach from the screen at any time with C-a d (Ctrl-a d).
13 | # Reattach to the screen anywhere with "screen -R". This can be done
14 | # after a detach or when ssh is inadvertently killed.
15 | 
16 | cd "$(dirname "${BASH_SOURCE[0]}")"
17 | source ./common/get-hosts.sh
18 | source ./common/get-dirs.sh
19 | 
20 | # start (or restart) Hadoop
21 | ./hadoop/restart-hadoop.sh
22 | hadoop dfsadmin -safemode wait > /dev/null
23 | 
24 | echo "Running Giraph experiments..."
25 | ./giraph/benchall.sh ${NUM_MACHINES} 5
26 | 
27 | echo "Running GPS experiments..."
28 | ./gps/benchall.sh ${NUM_MACHINES} 5
29 | 
30 | echo "Running GraphLab experiments..."
31 | ./graphlab/benchall.sh ${NUM_MACHINES} 5
32 | 
33 | echo "Running Mizan experiments..."
34 | ./mizan/benchall.sh ${NUM_MACHINES} 5


--------------------------------------------------------------------------------
/benchmark/common/bench-finish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Finish data logging/collection at the master and all worker machines.
 4 | 
 5 | if [ $# -ne 1 ]; then
 6 |     echo "usage: $0 log-name-prefix"
 7 |     exit -1
 8 | fi
 9 | 
10 | source "$(dirname "${BASH_SOURCE[0]}")"/get-hosts.sh
11 | 
12 | logname=$1
13 | dir=$PWD
14 | 
15 | for ((i = 0; i <= ${NUM_MACHINES}; i++)); do
16 |     nbtfile=${logname}_${i}_nbt.txt   # network bytes total
17 | 
18 |     # special case for master, to make it work for local testing too
19 |     if [ $i -eq  0 ]; then
20 |         name=${HOSTNAME}
21 |     else
22 |         name=${CLUSTER_NAME}${i}
23 |     fi
24 | 
25 |     # 1. Change to the same directory as master.
26 |     # 2. Append final network usage.
27 |     # 3. Kill sar and free to stop tracking.
28 |     #
29 |     # NOTE: - could use `jobs -p` for kill, but difficult b/c we're ssh-ing
30 |     #       - must escape $ for things that should be evaluated remotely
31 |     ssh ${name} "cd \"$dir\"; cat /proc/net/dev >> ./logs/${nbtfile} & kill \$(pgrep sar) & kill \$(pgrep free)" &
32 | done
33 | wait
34 | 
35 | # get worker machines' files in parallel, with compression to speed things up
36 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
37 |     rsync -az ${CLUSTER_NAME}${i}:"$dir"/logs/${logname}_${i}_*.txt ./logs/ &
38 | done
39 | wait


--------------------------------------------------------------------------------
/benchmark/common/bench-init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Initiate data logging/collection at the master and all worker machines.
 4 | 
 5 | if [ $# -ne 1 ]; then
 6 |     echo "usage: $0 log-name-prefix"
 7 |     exit -1
 8 | fi
 9 | 
10 | source "$(dirname "${BASH_SOURCE[0]}")"/get-hosts.sh
11 | 
12 | logname=$1
13 | dir=$PWD
14 | 
15 | for ((i = 0; i <= ${NUM_MACHINES}; i++)); do
16 |     cpufile=${logname}_${i}_cpu.txt   # cpu usage
17 |     netfile=${logname}_${i}_net.txt   # network usage
18 |     memfile=${logname}_${i}_mem.txt   # memory usage
19 |     nbtfile=${logname}_${i}_nbt.txt   # network bytes total
20 | 
21 |     # special case for master, to make it work for local testing too
22 |     if [ $i -eq  0 ]; then
23 |         name=${HOSTNAME}
24 |     else
25 |         name=${CLUSTER_NAME}${i}
26 |     fi
27 | 
28 |     # 1. Change to the same directory as master.
29 |     # 2. Start sysstat for cpu and network usage, and free for memory usage (1s intervals).
30 |     # 3. Print initial network bytes.
31 |     #
32 |     # NOTE: - & is like variant of ;, so don't need both
33 |     #       - grep needs stdbuf correction, otherwise nothing shows up
34 |     ssh ${name} "cd \"$dir\"; sar 1 > ./logs/${cpufile} & free -s 1 | stdbuf -o0 grep + > ./logs/${memfile} & sar -n DEV 1 | stdbuf -o0 grep 'lo\|eth0' > ./logs/${netfile} & cat /proc/net/dev > ./logs/${nbtfile}" &
35 | done
36 | wait


--------------------------------------------------------------------------------
/benchmark/common/cleanup-bench.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Cleans up rogue stat programs created by bench-init,
 4 | # in the event that bench-finish was unable to run.
 5 | #
 6 | # Alternatively, one can run bench-finish by passing in
 7 | # the correct log name prefix to clean things up and get
 8 | # the worker machines' (incomplete) logs.
 9 | 
10 | source "$(dirname "${BASH_SOURCE[0]}")"/get-hosts.sh
11 | 
12 | for ((i = 0; i <= ${NUM_MACHINES}; i++)); do
13 |     # special case for master, to make it work for local testing too
14 |     if [ $i -eq  0 ]; then
15 |         name=${HOSTNAME}
16 |     else
17 |         name=${CLUSTER_NAME}${i}
18 |     fi
19 | 
20 |     ssh ${name} "kill \$(pgrep sar) & kill \$(pgrep free)" &
21 | done
22 | wait


--------------------------------------------------------------------------------
/benchmark/common/get-configs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Specifies system-specific configuration parameters
 4 | # used by the various scripts.
 5 | #
 6 | # NOTE: include/source using "$(dirname "${BASH_SOURCE[0]}")"
 7 | # as a part of the directory.
 8 | 
 9 | # maximum JVM heap size for Giraph (per machine)
10 | # NOTE: to put changes into effect without re-initializing everything (i.e., ../init-all.sh),
11 | #       run ../hadoop/init.sh; ../hadoop/restart-hadoop.sh 1
12 | GIRAPH_XMX=14500M
13 | 
14 | # maximum JVM heap size for GPS (per WORKER, not machine)
15 | GPS_WORKER_XMX=7250M
16 | # max JVM heap size for GPS master
17 | GPS_MASTER_XMX=4096M
18 | 
19 | 
20 | # number of compute/input/output threads per machine
21 | GIRAPH_THREADS=2
22 | 
23 | # number of workers per machine (WPM)
24 | GPS_WPM=2
25 | MIZAN_WPM=2   # NOTE: re-run premizan if this is changed


--------------------------------------------------------------------------------
/benchmark/common/get-dirs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Specifies the absolute paths of the systems and other things.
 4 | #
 5 | # If path has spaces, escape the spaces AND quote it. For example,
 6 | # SOME_DIR="/home/me/not\ a\ great\ folder\ name/".
 7 | #
 8 | # NOTE: if the including script will be included in other
 9 | # scripts, use "$(dirname "${BASH_SOURCE[0]}")" as a part
10 | # of the directory.
11 | 
12 | DIR_PREFIX=/home/ubuntu
13 | #DIR_PREFIX=/home/young/cs848   # for testing on a single machine
14 | 
15 | # location of datasets/input graphs
16 | DATASET_DIR="$DIR_PREFIX"/datasets/
17 | 
18 | # $JAVA_DIR/bin/java should be the Java binary that is
19 | # used by all systems (incl. Hadoop) that need Java
20 | JAVA_DIR="$DIR_PREFIX"/jdk1.6.0_30/
21 | 
22 | # HADOOP_DATA is where HDFS files and Hadoop logs are stored
23 | HADOOP_DIR="$DIR_PREFIX"/hadoop-1.0.4/
24 | HADOOP_DATA_DIR="$DIR_PREFIX"/hadoop_data/
25 | 
26 | GIRAPH_DIR="$DIR_PREFIX"/giraph-1.0.0/
27 | 
28 | # These must match "GPS_DIR" and "GPS_LOG_DIRECTORY" of $GPS_DIR/conf/gps-env.sh
29 | GPS_DIR="$DIR_PREFIX"/gps-rev-110/
30 | GPS_LOG_DIR="$DIR_PREFIX"/var/tmp/
31 | 
32 | GRAPHLAB_DIR="$DIR_PREFIX"/graphlab-2a063b3829/
33 | MIZAN_DIR="$DIR_PREFIX"/Mizan-0.1bu1/


--------------------------------------------------------------------------------
/benchmark/common/ssh-check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Simple script to check if worker machines can be ssh'd to.
 4 | 
 5 | cd "$(dirname "${BASH_SOURCE[0]}")"
 6 | source ./get-hosts.sh
 7 | source ./get-dirs.sh
 8 | 
 9 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
10 |     nc -v -w 1 ${CLUSTER_NAME}${i} -z 22
11 | done
12 | 


--------------------------------------------------------------------------------
/benchmark/datasets/Makefile:
--------------------------------------------------------------------------------
 1 | all: snap-convert snap-revert mst-convert
 2 | 
 3 | clean:
 4 | 	rm -f snap-convert
 5 | 	rm -f snap-revert
 6 | 	rm -f mst-convert
 7 | 
 8 | snap-convert: snap-convert.cpp
 9 | 	g++ -Wall snap-convert.cpp -o snap-convert
10 | 
11 | snap-revert: snap-revert.cpp
12 | 	g++ -Wall snap-revert.cpp -o snap-revert
13 | 
14 | mst-convert: mst-convert.cpp
15 | 	g++ -Wall mst-convert.cpp -o mst-convert


--------------------------------------------------------------------------------
/benchmark/datasets/convert-adj.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # second arg is 1 if graph is for MST (SNAP format w/ edge weights)
 4 | # and 0 otherwise (regular SNAP format)
 5 | if [ $# -ne 2 ]; then
 6 |     echo "usage: $0 input-graph do-mst?"
 7 |     echo ""
 8 |     echo "do-mst: 0 converts regular SNAP format (src dst)"
 9 |     echo "        1 converts SNAP with edge weights (src dst weight)"
10 |     exit -1
11 | fi
12 | 
13 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
14 | graph=$(echo "$1" | sed 's/.txt$//g')
15 | domst=$2
16 | 
17 | if [[ ! -f "${graph}.txt" ]]; then
18 |     echo "${graph}.txt does not exist."
19 |     exit -1
20 | fi
21 | 
22 | if [[ -f "${graph}-adj.txt" ]]; then
23 |     echo "${graph}-adj.txt already exists. Delete it first."
24 |     exit -1
25 | fi
26 | 
27 | # convert graph to adjacency format
28 | echo "Converting ${graph}.txt to adjacency format..."
29 | if [[ ${domst} -eq 1 ]]; then
30 |     "${scriptdir}"/snap-convert "${graph}.txt" "${graph}-adj.txt" 2 2
31 | else
32 |     "${scriptdir}"/snap-convert "${graph}.txt" "${graph}-adj.txt" 1 1
33 | fi
34 | 
35 | echo "Done!"


--------------------------------------------------------------------------------
/benchmark/datasets/convert-mst.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Converts a SNAP graph input into an undirected graph
 4 | # with unique edge weights. Output is in SNAP format,
 5 | # with an additional column for weights.
 6 | #
 7 | # Processor and memory arguments below are used for sort.
 8 | procs=$(nproc)
 9 | mem=4G
10 | 
11 | if [ $# -ne 1 ]; then
12 |     echo "usage: $0 input-graph"
13 |     exit -1
14 | fi
15 | 
16 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
17 | graph=$(echo "$1" | sed 's/.txt$//g')
18 | 
19 | if [[ ! -f "${graph}.txt" ]]; then
20 |     echo "${graph}.txt does not exist."
21 |     exit -1
22 | fi
23 | 
24 | if [[ -f "${graph}-mst.txt" ]]; then
25 |     echo "${graph}-mst.txt already exists. Delete it first."
26 |     exit -1
27 | fi
28 | 
29 | # sort the input, if it's not already sorted
30 | unsorted=$(sort -nk1 -nk2 --parallel=${procs} -S ${mem} -c "${graph}.txt" |& wc -l)
31 | 
32 | if [[ ${unsorted} -eq 0 ]]; then
33 |     echo "Input already sorted."
34 |     sortedgraph="$graph"
35 | else
36 |     echo "Sorting input..."
37 |     sort -nk1 -nk2 --parallel=${procs} -S ${mem} "${graph}.txt" > "${graph}-sorted.txt"
38 |     sortedgraph="${graph}-sorted"
39 | 
40 |     echo "Delete unsorted input?"
41 |     rm -i "${graph}.txt"
42 | fi
43 | 
44 | echo "Converting ${graph}.txt to MST format..."
45 | 
46 | "${scriptdir}"/mst-convert "${sortedgraph}.txt" "${graph}-mst-unsorted.txt"
47 | 
48 | # sort the output
49 | echo "Sorting output..."
50 | sort -nk1 -nk2 --parallel=${procs} -S ${mem} "${graph}-mst-unsorted.txt" > "${graph}-mst.txt"
51 | 
52 | rm -f "${graph}-mst-unsorted.txt"
53 | 
54 | echo "Done!"


--------------------------------------------------------------------------------
/benchmark/datasets/load-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Loads the input data, based on the cluster size.
 4 | #
 5 | # The size can be specified as an argument. Otherwise,
 6 | # it will be obtained based on ../common/get-hosts.sh.
 7 | 
 8 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 9 | source "$commondir"/get-dirs.sh
10 | source "$commondir"/get-hosts.sh
11 | 
12 | if [ $# -eq 0 ]; then
13 |     case ${NUM_MACHINES} in
14 |         4) size=1;;
15 |         8) size=1;;
16 |         16) size=2;;
17 |         32) size=2;;
18 |         64) size=3;;
19 |         128) size=3;;
20 |         *) echo "Invalid number of machines.";
21 |            echo "usage: $0 size";
22 |            echo "";
23 |            echo "size: 1 for amazon, google, patents";
24 |            echo "      2 for livejournal, orkut, arabic, twitter";
25 |            echo "      3 for livejournal, orkut, arabic, twitter, uk0705";
26 |            exit -1;;
27 |     esac
28 | else
29 |     size=$1
30 | fi
31 | 
32 | cd "$DATASET_DIR"
33 | 
34 | hadoop dfsadmin -safemode wait > /dev/null
35 | hadoop dfs -mkdir ./input || true    # no problem if it already exists
36 | 
37 | case ${size} in
38 |     1)  echo "Uploading amazon*.txt...";  hadoop dfs -put amazon*.txt ./input/;
39 |         echo "Uploading google*.txt...";  hadoop dfs -put google*.txt ./input/;
40 |         echo "Uploading patents*.txt..."; hadoop dfs -put patents*.txt ./input/;;
41 |     2)  echo "Uploading livejournal*.txt..."; hadoop dfs -put livejournal*.txt ./input/;
42 |         echo "Uploading orkut*.txt...";       hadoop dfs -put orkut*.txt ./input/;
43 |         echo "Uploading arabic*.txt...";      hadoop dfs -put arabic*.txt ./input/;
44 |         echo "Uploading twitter-adj.txt...";  hadoop dfs -put twitter-adj.txt ./input/;;
45 |     3)  echo "Uploading livejournal*.txt..."; hadoop dfs -put livejournal*.txt ./input/;
46 |         echo "Uploading orkut*.txt...";       hadoop dfs -put orkut*.txt ./input/;
47 |         echo "Uploading arabic*.txt...";      hadoop dfs -put arabic*.txt ./input/;
48 |         echo "Uploading twitter*.txt...";     hadoop dfs -put twitter*.txt ./input/;
49 |         echo "Uploading uk0705-adj.txt...";   hadoop dfs -put uk0705-adj.txt ./input/;
50 |         echo "Uploading uk0705-mst-adj.txt..."; hadoop dfs -put uk0705-mst-adj.txt ./input/;;
51 |     *) echo "Invalid size"; exit -1;;
52 | esac
53 | 
54 | echo "Done."


--------------------------------------------------------------------------------
/benchmark/datasets/load-splits.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Loads split input data, based on the cluster size.
 4 | #
 5 | # The size can be specified as an argument. Otherwise,
 6 | # it will be obtained based on ../common/get-hosts.sh.
 7 | 
 8 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 9 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
10 | source "$commondir"/get-dirs.sh
11 | source "$commondir"/get-hosts.sh
12 | 
13 | if [ $# -eq 0 ]; then
14 |     case ${NUM_MACHINES} in
15 |         4) size=1;;
16 |         8) size=1;;
17 |         16) size=2;;
18 |         32) size=2;;
19 |         64) size=3;;
20 |         128) size=3;;
21 |         *) echo "Invalid number of machines.";
22 |            echo "usage: $0 size";
23 |            echo "";
24 |            echo "size: 1 for amazon, google, patents";
25 |            echo "      2 for livejournal, orkut, arabic, twitter";
26 |            echo "      3 for livejournal, orkut, arabic, twitter, uk0705";
27 |            exit -1;;
28 |     esac
29 | else
30 |     size=$1
31 | fi
32 | 
33 | cd "$DATASET_DIR"
34 | 
35 | hadoop dfsadmin -safemode wait > /dev/null
36 | hadoop dfs -mkdir ./input || true    # no problem if it already exists
37 | 
38 | case ${size} in
39 |     1)  "${scriptdir}"/split-input.sh amazon-adj.txt ${NUM_MACHINES};
40 |         "${scriptdir}"/split-input.sh google-adj.txt ${NUM_MACHINES};
41 |         "${scriptdir}"/split-input.sh patents-adj.txt ${NUM_MACHINES};;
42 |     2)  "${scriptdir}"/split-input.sh livejournal-adj.txt ${NUM_MACHINES};
43 |         "${scriptdir}"/split-input.sh orkut-adj.txt ${NUM_MACHINES};
44 |         "${scriptdir}"/split-input.sh arabic-adj.txt ${NUM_MACHINES};
45 |         "${scriptdir}"/split-input.sh twitter-adj.txt ${NUM_MACHINES};;
46 |     3)  "${scriptdir}"/split-input.sh livejournal-adj.txt ${NUM_MACHINES};
47 |         "${scriptdir}"/split-input.sh orkut-adj.txt ${NUM_MACHINES};
48 |         "${scriptdir}"/split-input.sh arabic-adj.txt ${NUM_MACHINES};
49 |         "${scriptdir}"/split-input.sh twitter-adj.txt ${NUM_MACHINES};
50 |         "${scriptdir}"/split-input.sh uk0705-adj.txt ${NUM_MACHINES};;
51 |     *) echo "Invalid size"; exit -1;;
52 | esac
53 | 
54 | case ${size} in
55 |     1)  echo "Uploading amazon-adj-split/...";  hadoop dfs -put amazon-adj-split/ ./input/;
56 |         echo "Uploading google-adj-split/...";  hadoop dfs -put google-adj-split/ ./input/;
57 |         echo "Uploading patents-adj-split/..."; hadoop dfs -put patents-adj-split/ ./input/;;
58 |     2)  echo "Uploading livejournal-adj-split/..."; hadoop dfs -put livejournal-adj-split/ ./input/;
59 |         echo "Uploading orkut-adj-split/...";       hadoop dfs -put orkut-adj-split/ ./input/;
60 |         echo "Uploading arabic-adj-split/...";      hadoop dfs -put arabic-adj-split/ ./input/;
61 |         echo "Uploading twitter-adj-split/...";     hadoop dfs -put twitter-adj-split/ ./input/;;
62 |     3)  echo "Uploading livejournal-adj-split/..."; hadoop dfs -put livejournal-adj-split/ ./input/;
63 |         echo "Uploading orkut-adj-split/...";       hadoop dfs -put orkut-adj-split/ ./input/;
64 |         echo "Uploading arabic-adj-split/...";      hadoop dfs -put arabic-adj-split/ ./input/;
65 |         echo "Uploading twitter-adj-split/...";     hadoop dfs -put twitter-adj-split/ ./input/;
66 |         echo "Uploading uk0705-adj-split/...";      hadoop dfs -put uk0705-adj-split/ ./input/;;
67 |     *) echo "Invalid size"; exit -1;;
68 | esac
69 | 
70 | echo "Done."


--------------------------------------------------------------------------------
/benchmark/datasets/snap-convert.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <fstream>
  3 | #include <iostream>
  4 | #include <string>
  5 | #include <cerrno>
  6 | 
  7 | #define F_IN_SNAP        1
  8 | #define F_IN_SNAPWEIGHT  2
  9 | #define F_IN_GEN_UNITY   3
 10 | #define F_IN_GEN_SEQ     4
 11 | 
 12 | #define F_TO_ADJ         1
 13 | #define F_TO_ADJWEIGHT   2
 14 | #define F_TO_JSON        3
 15 | 
 16 | static long counter = 1;
 17 | 
 18 | static void usage(char **argv) {
 19 |   std::cout << "usage: " << argv[0] << " input-file output-file in-format out-format" << std::endl;
 20 |   std::cout << std::endl;
 21 |   std::cout << "in-format:  1. SNAP format (each line is: src dst)" << std::endl;
 22 |   std::cout << "            2. SNAP with weights (src dst weight)" << std::endl;
 23 |   std::cout << "            3. Same as 1, but output edge weights of 1." << std::endl;
 24 |   std::cout << "            4. Same as 1, but output unique sequential edge weights." << std::endl;
 25 |   std::cout << "               (i.e., weights are assigned sequentially in the order" << std::endl;
 26 |   std::cout << "                of how edges are listed in the input file)" << std::endl;
 27 |   std::cout << std::endl;
 28 |   std::cout << "out-format: 1. Adjacency list format (src dst1 dst2 ...)" << std::endl;
 29 |   std::cout << "            2. Adjacency list with weights (src dst1 weight1 dst2 weight2 ...)" << std::endl;
 30 |   std::cout << "            3. JSON ([src,0,[[dst1,weight1],[dst2,weight2],...]])" << std::endl;
 31 |   std::cout << std::endl;
 32 |   std::cout << "Note: edges with the same source ID must appear in a contiguous block!" << std::endl;
 33 |   std::cout << "      e.g., 1 0  but NOT 1 0" << std::endl;
 34 |   std::cout << "            1 2          2 3" << std::endl;
 35 |   std::cout << "            2 3          1 2" << std::endl;
 36 | }
 37 | 
 38 | static inline void get_edge_weight(std::ifstream &ifs, int in_format, long &edge_weight) {
 39 |   switch (in_format) {
 40 |   case F_IN_SNAP:
 41 |     edge_weight = 0;
 42 |     break;
 43 | 
 44 |   case F_IN_SNAPWEIGHT:
 45 |     ifs >> edge_weight;
 46 |     break;
 47 | 
 48 |   case F_IN_GEN_UNITY:
 49 |     edge_weight = 1;
 50 |     break;
 51 | 
 52 |   case F_IN_GEN_SEQ:
 53 |     edge_weight = counter;
 54 |     counter++;
 55 |     break;
 56 | 
 57 |   default:
 58 |     std::cout << "Invalid in-format: " << in_format << "!" << std::endl;
 59 |   }
 60 | }
 61 | 
 62 | /**
 63 |  * Converts dataset/graph input formats.
 64 |  *
 65 |  * NOTE: Does not sort anything!
 66 |  */
 67 | int main(int argc, char **argv) {
 68 |   if ( argc < 5 ) {
 69 |     usage(argv);
 70 |     return -1;
 71 |   }
 72 | 
 73 |   std::ifstream ifs(argv[1], std::ifstream::in);
 74 |   std::ofstream ofs(argv[2], std::ofstream::out);
 75 |   int in_format = atoi(argv[3]);
 76 |   int out_format = atoi(argv[4]);
 77 | 
 78 |   if (!ifs || !ofs ||
 79 |       (in_format < F_IN_SNAP || in_format > F_IN_GEN_SEQ) ||
 80 |       (out_format < F_TO_ADJ || out_format > F_TO_JSON) ) {
 81 |     usage(argv);
 82 |     return -1;
 83 |   }
 84 | 
 85 |   std::cout.sync_with_stdio(false);    // don't flush on \n
 86 |   
 87 |   // longs, just to be safe
 88 |   long vertex_id, edge_dst, edge_weight;
 89 |   long curr_id;
 90 | 
 91 |   // first pair of reads
 92 |   ifs >> curr_id;
 93 |   ifs >> edge_dst;
 94 |   get_edge_weight(ifs, in_format, edge_weight);
 95 |   
 96 |   // NOTE: eof() DOES happen to work here, b/c inner while(ifs >> ...)
 97 |   // statement breaks when no data is left *and* this failure sets
 98 |   // EOF flag correctly & in time for eof() to see
 99 |   switch (out_format) {
100 |   case F_TO_ADJ:
101 |     while (!ifs.eof()) {
102 |       // format: vertex-id edge-dst ...
103 |       ofs << curr_id << " " << edge_dst;
104 | 
105 |       while (ifs >> vertex_id >> edge_dst) {
106 |         get_edge_weight(ifs, in_format, edge_weight);
107 |         if (vertex_id != curr_id) {
108 |           break;
109 |         }
110 | 
111 |         ofs << " " << edge_dst;
112 |       }
113 | 
114 |       ofs << "\n";
115 | 
116 |       // new vertex_id found. carry over edge_dst and edge_weight too.
117 |       curr_id = vertex_id;
118 |     }
119 |     break;
120 | 
121 |   case F_TO_ADJWEIGHT:
122 |     while (!ifs.eof()) {
123 |       // format: vertex-id edge-dst edge-val ...
124 |       ofs << curr_id << " " << edge_dst << " " << edge_weight;
125 | 
126 |       while (ifs >> vertex_id >> edge_dst) {
127 |         get_edge_weight(ifs, in_format, edge_weight);
128 |         if (vertex_id != curr_id) {
129 |           break;
130 |         }
131 | 
132 |         ofs << " " << edge_dst << " " << edge_weight;
133 |       }
134 | 
135 |       ofs << "\n";
136 | 
137 |       // new vertex_id found. carry over edge_dst and edge_weight too.
138 |       curr_id = vertex_id;
139 |     }
140 |     break;
141 | 
142 |   case F_TO_JSON:
143 |     while (!ifs.eof()) {
144 |       // format: [vertex-id, vertex-val, [[edge-dst,edge-val],...]]
145 |       ofs << "[" << curr_id << ",0,[[" << edge_dst << "," << edge_weight << "]";
146 | 
147 |       while (ifs >> vertex_id >> edge_dst) {
148 |         get_edge_weight(ifs, in_format, edge_weight);
149 |         if (vertex_id != curr_id) {
150 |           break;
151 |         }
152 | 
153 |         ofs << ",[" << edge_dst << "," << edge_weight << "]";
154 |       }
155 | 
156 |       ofs << "]]\n";
157 | 
158 |       // new vertex_id found. carry over edge_dst and edge_weight too.
159 |       curr_id = vertex_id;
160 |     }
161 |     break;
162 | 
163 |   default:
164 |     std::cout << "Invalid out-format: " << out_format << "!" << std::endl;
165 |   }
166 | 
167 |   ifs.close();
168 |   ofs.flush();
169 |   ofs.close();
170 |   return 0;
171 | }
172 | 


--------------------------------------------------------------------------------
/benchmark/datasets/snap-revert.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <fstream>
 3 | #include <iostream>
 4 | #include <string>
 5 | #include <cerrno>
 6 | 
 7 | #define F_IN_ADJ         1
 8 | #define F_IN_ADJWEIGHT   2
 9 | 
10 | #define F_TO_SNAP        1
11 | #define F_TO_SNAPWEIGHT  2
12 | 
13 | static void usage(char **argv) {
14 |   std::cout << "usage: " << argv[0] << " input-file output-file in-format out-format" << std::endl;
15 |   std::cout << std::endl;
16 |   std::cout << "in-format:  1. Adjacency list format (src dst1 dst2 ...)" << std::endl;
17 |   std::cout << "            2. Adjacency list with weights (src dst1 weight1 dst2 weight2 ...)" << std::endl;
18 |   std::cout << std::endl;
19 |   std::cout << "out-format: 1. SNAP format (src dst)" << std::endl;
20 |   std::cout << "            2. SNAP with weights (src dst weight)" << std::endl;
21 | }
22 | 
23 | 
24 | static inline void write_output(std::ofstream &ofs, int out_format,
25 |                                 long vertex_id, long edge_dst, long edge_weight) {
26 |   switch(out_format) {
27 |   case F_TO_SNAP:
28 |     ofs << vertex_id << " " << edge_dst << "\n";
29 |     break;
30 | 
31 |   case F_TO_SNAPWEIGHT:
32 |     ofs << vertex_id << " " << edge_dst << " " << edge_weight << "\n";
33 |     break;
34 | 
35 |   default:
36 |     std::cout << "Invalid out-format: " << out_format << "!" << std::endl;
37 |   }
38 | }
39 | 
40 | 
41 | /**
42 |  * Converts adjacency format to SNAP.
43 |  *
44 |  * NOTE: Does not sort anything!
45 |  */
46 | int main(int argc, char **argv) {
47 |   if ( argc < 5 ) {
48 |     usage(argv);
49 |     return -1;
50 |   }
51 | 
52 |   std::ifstream ifs(argv[1], std::ifstream::in);
53 |   std::ofstream ofs(argv[2], std::ofstream::out);
54 |   int in_format = atoi(argv[3]);
55 |   int out_format = atoi(argv[4]);
56 | 
57 |   if (!ifs || !ofs ||
58 |       (in_format < F_IN_ADJ || in_format > F_IN_ADJWEIGHT) ||
59 |       (out_format < F_TO_SNAP || in_format > F_TO_SNAPWEIGHT)) {
60 |     usage(argv);
61 |     return -1;
62 |   }
63 |   
64 |   std::cout.sync_with_stdio(false);    // don't flush on \n
65 | 
66 |   // longs, just to be safe
67 |   long vertex_id, edge_dst, edge_weight;
68 | 
69 |   switch (in_format) {
70 |   case F_IN_ADJ:
71 |     while (ifs >> vertex_id) {
72 |       while ( (ifs.peek() != '\n') && (ifs >> edge_dst) ) {
73 |         write_output(ofs, out_format, vertex_id, edge_dst, 0);
74 |       }
75 |     }
76 |     break;
77 | 
78 |   case F_IN_ADJWEIGHT:
79 |     while (ifs >> vertex_id) {
80 |       while ( (ifs.peek() != '\n') && (ifs >> edge_dst && ifs >> edge_weight) ) {
81 |         write_output(ofs, out_format, vertex_id, edge_dst, edge_weight);
82 |       }
83 |     }
84 |     break;
85 | 
86 |   default:
87 |     std::cout << "Invalid in-format: " << in_format << "!" << std::endl;
88 |   }
89 | 
90 |   ifs.close();
91 |   ofs.flush();
92 |   ofs.close();
93 |   return 0;
94 | }
95 | 


--------------------------------------------------------------------------------
/benchmark/datasets/split-input.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Split given input-graph into parts, placed in input-graph-split/
 4 | 
 5 | if [ $# -ne 2 ]; then
 6 |     echo "usage: $0 input-graph num-splits"
 7 |     exit -1
 8 | fi
 9 | 
10 | graph=$(echo "$1" | sed 's/.txt$//g')
11 | numsplits=$2
12 | 
13 | if [[ ! -f "${graph}.txt" ]]; then
14 |     echo "${graph}.txt does not exist."
15 |     exit -1
16 | fi
17 | 
18 | if [[ $2 -le 0 ]]; then
19 |     echo "Invalid number of chunks."
20 |     exit -1
21 | fi
22 | 
23 | if [[ -d "${graph}-split" ]]; then
24 |     echo "${graph}-split/ already exists. Delete it first."
25 |     exit -1
26 | fi
27 | 
28 | # split input into specified chunks
29 | mkdir "${graph}-split"
30 | 
31 | echo "Splitting ${graph}.txt..."
32 | split "${graph}.txt" "${graph}-split/${graph}-" -n l/${numsplits}
33 | 
34 | echo "Done!"


--------------------------------------------------------------------------------
/benchmark/giraph/benchall.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | if [ $# -ne 2 ]; then
  4 |     echo "usage: $0 machines runs"
  5 |     echo ""
  6 |     echo "machines: 4, 8, 16, 32, 64, or 128"
  7 |     exit -1
  8 | fi
  9 | 
 10 | cd "$(dirname "${BASH_SOURCE[0]}")"
 11 | 
 12 | MACHINES=$1
 13 | RUNS=$2
 14 | 
 15 | case ${MACHINES} in
 16 |     4)   GRAPHS=(amazon google patents);
 17 |          GRAPHS_MST=(amazon google patents);
 18 |          GRAPHS_MST_HASH=(amazon google patents);
 19 |          SRC=(0 0 6009554);;  # for SSSP
 20 |     8)   GRAPHS=(amazon google patents);
 21 |          GRAPHS_MST=(amazon google patents);
 22 |          GRAPHS_MST_HASH=(amazon google patents);
 23 |          SRC=(0 0 6009554);;
 24 |     16)  GRAPHS=(livejournal orkut arabic twitter);
 25 |          GRAPHS_MST=(livejournal orkut arabic);
 26 |          GRAPHS_MST_HASH=(livejournal orkut);
 27 |          SRC=(0 1 3 0);;
 28 |     32)  GRAPHS=(livejournal orkut arabic twitter);
 29 |          GRAPHS_MST=(livejournal orkut arabic);
 30 |          GRAPHS_MST_HASH=(livejournal orkut arabic);
 31 |          SRC=(0 1 3 0);;
 32 |     64)  GRAPHS=(livejournal orkut arabic twitter uk0705);
 33 |          GRAPHS_MST=(livejournal orkut arabic);
 34 |          GRAPHS_MST_HASH=(livejournal orkut arabic twitter);
 35 |          SRC=(0 1 3 0 0);;
 36 |     128) GRAPHS=(livejournal orkut arabic twitter uk0705);
 37 |          GRAPHS_MST=(livejournal orkut arabic uk0705);
 38 |          GRAPHS_MST_HASH=(livejournal orkut arabic twitter);
 39 |          SRC=(0 1 3 0 0);;
 40 |     *) echo "Invalid machines"; exit -1;;
 41 | esac
 42 | 
 43 | ##################
 44 | # Byte array run
 45 | ##################
 46 | # we split the algs up for clarity
 47 | for graph in "${GRAPHS[@]}"; do
 48 |     for ((i = 1; i <= RUNS; i++)); do
 49 |         ./pagerank.sh "${graph}-adj.txt" ${MACHINES} 0
 50 |     done
 51 | done
 52 | 
 53 | for j in "${!GRAPHS[@]}"; do
 54 |     for ((i = 1; i <= RUNS; i++)); do
 55 |         ./sssp.sh "${GRAPHS[$j]}-adj.txt" ${MACHINES} 0 ${SRC[$j]}
 56 |     done
 57 | done
 58 | 
 59 | for graph in "${GRAPHS[@]}"; do
 60 |     for ((i = 1; i <= RUNS; i++)); do
 61 |         ./wcc.sh "${graph}-adj.txt" ${MACHINES} 0
 62 |     done
 63 | done
 64 | 
 65 | # WARNING: this can be VERY slow for large graphs!!
 66 | for graph in "${GRAPHS_MST[@]}"; do
 67 |     for ((i = 1; i <= RUNS; i++)); do
 68 |         ./mst.sh "${graph}-mst-adj.txt" ${MACHINES} 0
 69 |     done
 70 | done
 71 | 
 72 | #for graph in "${GRAPHS[@]}"; do
 73 | #    for ((i = 1; i <= RUNS; i++)); do
 74 | #        ./dimest.sh "${graph}-adj.txt" ${MACHINES} 0
 75 | #    done
 76 | #done
 77 | 
 78 | 
 79 | #####################
 80 | # Hash map run
 81 | #####################
 82 | for graph in "${GRAPHS[@]}"; do
 83 |     for ((i = 1; i <= RUNS; i++)); do
 84 |         ./pagerank.sh "${graph}-adj.txt" ${MACHINES} 1
 85 |     done
 86 | done
 87 | 
 88 | for j in "${!GRAPHS[@]}"; do
 89 |     for ((i = 1; i <= RUNS; i++)); do
 90 |         ./sssp.sh "${GRAPHS[$j]}-adj.txt" ${MACHINES} 1 ${SRC[$j]}
 91 |     done
 92 | done
 93 | 
 94 | for graph in "${GRAPHS[@]}"; do
 95 |     for ((i = 1; i <= RUNS; i++)); do
 96 |         ./wcc.sh "${graph}-adj.txt" ${MACHINES} 1
 97 |     done
 98 | done
 99 | 
100 | for graph in "${GRAPHS_MST_HASH[@]}"; do
101 |     for ((i = 1; i <= RUNS; i++)); do
102 |         ./mst.sh "${graph}-mst-adj.txt" ${MACHINES} 1
103 |     done
104 | done
105 | 
106 | #for graph in "${GRAPHS[@]}"; do
107 | #    for ((i = 1; i <= RUNS; i++)); do
108 | #        ./dimest.sh "${graph}-adj.txt" ${MACHINES} 1
109 | #    done
110 | #done


--------------------------------------------------------------------------------
/benchmark/giraph/dimest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines edge-type"
 5 |     echo ""
 6 |     echo "edge-type: 0 for byte array edges"
 7 |     echo "           1 for hash map edges"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | source ../common/get-configs.sh
13 | 
14 | # place input in /user/${USER}/input/
15 | # output is in /user/${USER}/giraph-output/
16 | inputgraph=$(basename $1)
17 | outputdir=/user/${USER}/giraph-output/
18 | hadoop dfs -rmr "$outputdir" || true
19 | 
20 | # Technically this is the number of "workers", which can be more
21 | # than the number of machines. However, using multiple workers per
22 | # machine is inefficient! Use more Giraph threads instead (see below).
23 | machines=$2    
24 | 
25 | edgetype=$3
26 | case ${edgetype} in
27 |     0) edgeclass="";;     # byte array edges are used by default
28 |     1) edgeclass="-Dgiraph.inputOutEdgesClass=org.apache.giraph.edge.HashMapEdges \
29 |                   -Dgiraph.outEdgesClass=org.apache.giraph.edge.HashMapEdges";;
30 |     *) echo "Invalid edge-type"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=dimest_${inputgraph}_${machines}_${edgetype}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # running time
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
43 |     ${edgeclass} \
44 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
45 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
46 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
47 |     org.apache.giraph.examples.DiameterEstimationVertex \
48 |     -ca DiameterEstimationVertex.maxSS=30 \
49 |     -vif org.apache.giraph.examples.DiameterEstimationInputFormat \
50 |     -vip /user/${USER}/input/${inputgraph} \
51 |     -of org.apache.giraph.examples.DiameterEstimationVertex\$DiameterEstimationVertexOutputFormat \
52 |     -op "$outputdir" \
53 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
54 | 
55 | ## finish logging memory + network usage
56 | ../common/bench-finish.sh ${logname}
57 | 
58 | ## clean up step needed for Giraph
59 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/giraph/kill-java-job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Kill all Java instances corresponding to Giraph jobs.
 4 | # This is needed as they don't terminate automatically (they hang around consuming memory).
 5 | #
 6 | # NOTE: this will kill ALL jobs, including ongoing ones!
 7 | #
 8 | # To get rid of terminated running jobs from Hadoop web interface,
 9 | # use "hadoop job -kill job_yyyymmddhhmm_aaaa"
10 | 
11 | source "$(dirname "${BASH_SOURCE[0]}")"/../common/get-hosts.sh
12 | 
13 | # do a kill on the master separately---this is useful when testing on a single machine
14 | kill -9 $(ps aux | grep "[j]obcache/job_[0-9]\{12\}_[0-9]\{4\}/" | awk '{print $2}')
15 | 
16 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
17 |     # [j] is a nifty trick to avoid "grep" showing up as a result
18 |     ssh ${CLUSTER_NAME}$i "kill -9 \$(ps aux | grep \"[j]obcache/job_[0-9]\{12\}_[0-9]\{4\}/\" | awk '{print \$2}')" &
19 | done
20 | wait


--------------------------------------------------------------------------------
/benchmark/giraph/mst.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines edge-type"
 5 |     echo ""
 6 |     echo "edge-type: 0 for byte array edges"
 7 |     echo "           1 for hash map edges"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | source ../common/get-configs.sh
13 | 
14 | # place input in /user/${USER}/input/
15 | # output is in /user/${USER}/giraph-output/
16 | inputgraph=$(basename $1)
17 | outputdir=/user/${USER}/giraph-output/
18 | hadoop dfs -rmr "$outputdir" || true
19 | 
20 | # Technically this is the number of "workers", which can be more
21 | # than the number of machines. However, using multiple workers per
22 | # machine is inefficient! Use more Giraph threads instead (see below).
23 | machines=$2
24 | 
25 | edgetype=$3
26 | case ${edgetype} in
27 |     0) edgeclass="";;     # byte array edges are used by default
28 |     1) edgeclass="-Dgiraph.inputOutEdgesClass=org.apache.giraph.edge.HashMapEdges \
29 |                   -Dgiraph.outEdgesClass=org.apache.giraph.edge.HashMapEdges";;
30 |     *) echo "Invalid edge-type"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=mst_${inputgraph}_${machines}_${edgetype}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # running time
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | # -Dmapred.task.timeout=0 is needed to prevent Giraph job from getting killed after spending 10 mins on one superstep
43 | # Giraph seems to ignore any mapred.task.timeout specified in Hadoop's mapred-site.xml
44 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
45 |     ${edgeclass} \
46 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
47 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
48 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
49 |     -Dmapred.task.timeout=0 \
50 |     org.apache.giraph.examples.MinimumSpanningTreeVertex \
51 |     -mc org.apache.giraph.examples.MinimumSpanningTreeVertex\$MinimumSpanningTreeVertexMasterCompute \
52 |     -vif org.apache.giraph.examples.MinimumSpanningTreeInputFormat \
53 |     -vip /user/${USER}/input/${inputgraph} \
54 |     -of org.apache.giraph.examples.MinimumSpanningTreeVertex\$MinimumSpanningTreeVertexOutputFormat \
55 |     -op "$outputdir" \
56 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
57 | 
58 | # -wc org.apache.giraph.examples.MinimumSpanningTreeVertex\$MinimumSpanningTreeVertexWorkerContext
59 | # see giraph-core/.../utils/ConfigurationUtils.java for command line opts (or -h flag to GiraphRunner)
60 | 
61 | ## finish logging memory + network usage
62 | ../common/bench-finish.sh ${logname}
63 | 
64 | ## clean up step needed for Giraph
65 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/giraph/pagerank.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines edge-type"
 5 |     echo ""
 6 |     echo "edge-type: 0 for byte array edges"
 7 |     echo "           1 for hash map edges"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | source ../common/get-configs.sh
13 | 
14 | # place input in /user/${USER}/input/
15 | # output is in /user/${USER}/giraph-output/
16 | inputgraph=$(basename $1)
17 | outputdir=/user/${USER}/giraph-output/
18 | hadoop dfs -rmr "$outputdir" || true
19 | 
20 | # Technically this is the number of "workers", which can be more
21 | # than the number of machines. However, using multiple workers per
22 | # machine is inefficient! Use more Giraph threads instead (see below).
23 | machines=$2
24 | 
25 | edgetype=$3
26 | case ${edgetype} in
27 |     0) edgeclass="";;     # byte array edges are used by default
28 |     1) edgeclass="-Dgiraph.inputOutEdgesClass=org.apache.giraph.edge.HashMapEdges \
29 |                   -Dgiraph.outEdgesClass=org.apache.giraph.edge.HashMapEdges";;
30 |     *) echo "Invalid edge-type"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=pagerank_${inputgraph}_${machines}_${edgetype}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # running time
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
43 |     ${edgeclass} \
44 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
45 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
46 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
47 |     org.apache.giraph.examples.SimplePageRankVertex \
48 |     -c org.apache.giraph.combiner.DoubleSumCombiner \
49 |     -ca SimplePageRankVertex.maxSS=30 \
50 |     -vif org.apache.giraph.examples.SimplePageRankInputFormat \
51 |     -vip /user/${USER}/input/${inputgraph} \
52 |     -of org.apache.giraph.examples.SimplePageRankVertex\$SimplePageRankVertexOutputFormat \
53 |     -op "$outputdir" \
54 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
55 | 
56 | # mc not needed b/c we don't want aggregators: -mc org.apache.giraph.examples.SimplePageRankVertex\$SimplePageRankVertexMasterCompute 
57 | # alternative output format: -of org.apache.giraph.io.formats.IdWithValueTextOutputFormat 
58 | 
59 | ## finish logging memory + network usage
60 | ../common/bench-finish.sh ${logname}
61 | 
62 | ## clean up step needed for Giraph
63 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/giraph/prtolfinder.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 input-graph machines"
 5 |     exit -1
 6 | fi
 7 | 
 8 | source ../common/get-dirs.sh
 9 | source ../common/get-hosts.sh
10 | source ../common/get-configs.sh
11 | 
12 | # place input in /user/${USER}/input/
13 | # output is in /user/${USER}/giraph-output/
14 | inputgraph=$(basename $1)
15 | outputdir=/user/${USER}/giraph-output/
16 | hadoop dfs -rmr "$outputdir" || true
17 | 
18 | # Technically this is the number of "workers", which can be more
19 | # than the number of machines. However, using multiple workers per
20 | # machine is inefficient! Use more Giraph threads instead (see below).
21 | machines=$2
22 | 
23 | ## log names
24 | logname=prtolfinder_${inputgraph}_${machines}_0_"$(date +%Y%m%d-%H%M%S)"
25 | logfile=${logname}_time.txt       # running time
26 | 
27 | 
28 | ## start logging memory + network usage
29 | #../common/bench-init.sh ${logname}
30 | 
31 | ## start algorithm run
32 | # we use default byte array edges (better performance)
33 | # NOTE: this outputs no data to HDFS
34 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
35 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
36 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
37 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
38 |     org.apache.giraph.examples.PageRankTolFinderVertex \
39 |     -mc org.apache.giraph.examples.PageRankTolFinderVertex\$PageRankTolFinderVertexMasterCompute \
40 |     -c org.apache.giraph.combiner.DoubleSumCombiner \
41 |     -ca PageRankTolFinderVertex.maxSS=30 \
42 |     -vif org.apache.giraph.examples.SimplePageRankInputFormat \
43 |     -vip /user/${USER}/input/${inputgraph} \
44 |     -of org.apache.giraph.examples.PageRankTolFinderVertex\$PageRankTolFinderVertexOutputFormat \
45 |     -op "$outputdir" \
46 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
47 | 
48 | # -wc org.apache.giraph.examples.PageRankTolFinderVertex\$PageRankTolFinderVertexWorkerContext
49 | 
50 | ## finish logging memory + network usage
51 | #../common/bench-finish.sh ${logname}
52 | 
53 | 
54 | ## get max deltas (changes in PR value) at each superstep
55 | jobid=$(grep "Running job" ./logs/${logfile} | awk '{print $7}')
56 | 
57 | # The master on a cluster will not have anything---this is for local testing
58 | darray[0]=$(cat "$HADOOP_DIR"/logs/userlogs/${jobid}/*/syslog | grep 'max change' | awk '{print $9}' | tr '\n' ' ')
59 | 
60 | # NOTE: this is a hack---ZK is located on one of the workers, so just go
61 | # through everyone and we'll get master.compute()'s output exactly once
62 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
63 |     darray[${i}]=$(ssh ${CLUSTER_NAME}${i} "cat \"$HADOOP_DIR\"/logs/userlogs/${jobid}/*/syslog | grep 'max change' | awk '{print \$9}' | tr '\n' ','")
64 | done
65 | 
66 | deltas=$(echo "${darray[*]}" | sed -e 's/^ *//' -e 's/ *$//')  # join array and strip whitespace
67 | 
68 | echo "" >> ./tolerances.txt
69 | echo "$(sed 's/-.*//g' <<< ${inputgraph})_deltas = [${deltas}]" >> ./tolerances.txt
70 | 
71 | ## clean up step needed for Giraph
72 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/giraph/recompile-giraph.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 4 | source "$commondir"/get-hosts.sh
 5 | source "$commondir"/get-dirs.sh
 6 | 
 7 | cd "$GIRAPH_DIR"
 8 | 
 9 | # -pl specifies what packages to compile (e.g., giraph-examples,giraph-core)
10 | # -Dfindbugs.skip skips "find bugs" stage (saves quite a bit of time)
11 | mvn clean install -Phadoop_1.0 -DskipTests -pl giraph-examples -Dfindbugs.skip
12 | 
13 | # copy compiled jars to worker machines
14 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
15 |     scp ./giraph-examples/target/*.jar ${CLUSTER_NAME}${i}:"$GIRAPH_DIR"/giraph-examples/target/ &
16 |     scp ./giraph-core/target/*.jar ${CLUSTER_NAME}${i}:"$GIRAPH_DIR"/giraph-core/target/ &
17 | done
18 | wait
19 | 
20 | echo "OK."


--------------------------------------------------------------------------------
/benchmark/giraph/sssp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 input-graph machines edge-type source-vertex"
 5 |     echo ""
 6 |     echo "edge-type: 0 for byte array edges"
 7 |     echo "           1 for hash map edges"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | source ../common/get-configs.sh
13 | 
14 | # place input in /user/${USER}/input/
15 | # output is in /user/${USER}/giraph-output/
16 | inputgraph=$(basename $1)
17 | outputdir=/user/${USER}/giraph-output/
18 | hadoop dfs -rmr "$outputdir" || true
19 | 
20 | # Technically this is the number of "workers", which can be more
21 | # than the number of machines. However, using multiple workers per
22 | # machine is inefficient! Use more Giraph threads instead (see below).
23 | machines=$2
24 | 
25 | edgetype=$3
26 | case ${edgetype} in
27 |     0) edgeclass="";;     # byte array edges are used by default
28 |     1) edgeclass="-Dgiraph.inputOutEdgesClass=org.apache.giraph.edge.HashMapEdges \
29 |                   -Dgiraph.outEdgesClass=org.apache.giraph.edge.HashMapEdges";;
30 |     *) echo "Invalid edge-type"; exit -1;;
31 | esac
32 | 
33 | src=$4
34 | 
35 | ## log names
36 | logname=sssp_${inputgraph}_${machines}_${edgetype}_"$(date +%Y%m%d-%H%M%S)"
37 | logfile=${logname}_time.txt       # running time
38 | 
39 | 
40 | ## start logging memory + network usage
41 | ../common/bench-init.sh ${logname}
42 | 
43 | ## start algorithm run
44 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
45 |     ${edgeclass} \
46 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
47 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
48 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
49 |     org.apache.giraph.examples.SimpleShortestPathsVertex \
50 |     -ca SimpleShortestPathsVertex.sourceId=${src} \
51 |     -vif org.apache.giraph.examples.SimpleShortestPathsInputFormat \
52 |     -vip /user/${USER}/input/${inputgraph} \
53 |     -of org.apache.giraph.io.formats.IdWithValueTextOutputFormat \
54 |     -op "$outputdir" \
55 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
56 | 
57 | ## finish logging memory + network usage
58 | ../common/bench-finish.sh ${logname}
59 | 
60 | ## clean up step needed for Giraph
61 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/giraph/wcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines edge-type"
 5 |     echo ""
 6 |     echo "edge-type: 0 for byte array edges"
 7 |     echo "           1 for hash map edges"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | source ../common/get-configs.sh
13 | 
14 | # place input in /user/${USER}/input/
15 | # output is in /user/${USER}/giraph-output/
16 | inputgraph=$(basename $1)
17 | outputdir=/user/${USER}/giraph-output/
18 | hadoop dfs -rmr "$outputdir" || true
19 | 
20 | # Technically this is the number of "workers", which can be more
21 | # than the number of machines. However, using multiple workers per
22 | # machine is inefficient! Use more Giraph threads instead (see below).
23 | machines=$2
24 | 
25 | edgetype=$3
26 | case ${edgetype} in
27 |     0) edgeclass="";;     # byte array edges are used by default
28 |     1) edgeclass="-Dgiraph.inputOutEdgesClass=org.apache.giraph.edge.HashMapEdges \
29 |                   -Dgiraph.outEdgesClass=org.apache.giraph.edge.HashMapEdges";;
30 |     *) echo "Invalid edge-type"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=wcc_${inputgraph}_${machines}_${edgetype}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # running time
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | hadoop jar "$GIRAPH_DIR"/giraph-examples/target/giraph-examples-1.0.0-for-hadoop-1.0.2-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
43 |     ${edgeclass} \
44 |     -Dgiraph.numComputeThreads=${GIRAPH_THREADS} \
45 |     -Dgiraph.numInputThreads=${GIRAPH_THREADS} \
46 |     -Dgiraph.numOutputThreads=${GIRAPH_THREADS} \
47 |     org.apache.giraph.examples.ConnectedComponentsVertex \
48 |     -vif org.apache.giraph.examples.ConnectedComponentsInputFormat \
49 |     -vip /user/${USER}/input/${inputgraph} \
50 |     -of org.apache.giraph.io.formats.IdWithValueTextOutputFormat \
51 |     -op "$outputdir" \
52 |     -w ${machines} 2>&1 | tee -a ./logs/${logfile}
53 | 
54 | ## finish logging memory + network usage
55 | ../common/bench-finish.sh ${logname}
56 | 
57 | ## clean up step needed for Giraph
58 | ./kill-java-job.sh


--------------------------------------------------------------------------------
/benchmark/gps/benchall.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | if [ $# -ne 2 ]; then
  4 |     echo "usage: $0 machines runs"
  5 |     echo ""
  6 |     echo "machines: 4, 8, 16, 32, 64, or 128"
  7 |     exit -1
  8 | fi
  9 | 
 10 | cd "$(dirname "${BASH_SOURCE[0]}")"
 11 | 
 12 | MACHINES=$1
 13 | RUNS=$2
 14 | 
 15 | case ${MACHINES} in
 16 |     4)   GRAPHS=(amazon google patents);
 17 |          GRAPHS_MST=(amazon google patents);
 18 |          SRC=(0 0 6009554);  # for SSSP
 19 |          SLEEP_TIME=60;;
 20 |     8)   GRAPHS=(amazon google patents);
 21 |          GRAPHS_MST=(amazon google patents);
 22 |          SRC=(0 0 6009554);
 23 |          SLEEP_TIME=60;;
 24 |     16)  GRAPHS=(livejournal orkut arabic twitter);
 25 |          GRAPHS_MST=(livejournal orkut arabic);
 26 |          SRC=(0 1 3 0);
 27 |          SLEEP_TIME=60;;
 28 |     32)  GRAPHS=(livejournal orkut arabic twitter);
 29 |          GRAPHS_MST=(livejournal orkut arabic);
 30 |          SRC=(0 1 3 0);
 31 |          SLEEP_TIME=60;;
 32 |     64)  GRAPHS=(livejournal orkut arabic twitter uk0705);
 33 |          GRAPHS_MST=(livejournal orkut arabic twitter);
 34 |          SRC=(0 1 3 0 0);
 35 |          SLEEP_TIME=80;;
 36 |     128) GRAPHS=(livejournal orkut arabic twitter uk0705);
 37 |          GRAPHS_MST=(livejournal orkut arabic twitter uk0705);
 38 |          SRC=(0 1 3 0 0);
 39 |          SLEEP_TIME=80;;
 40 |     *) echo "Invalid machines"; exit -1;;
 41 | esac
 42 | 
 43 | #################
 44 | # Normal run
 45 | #################
 46 | # we split the algs up for simplicity
 47 | for graph in "${GRAPHS[@]}"; do
 48 |     for ((i = 1; i <= RUNS; i++)); do
 49 |         ./pagerank.sh "${graph}-adj.txt" ${MACHINES} 0
 50 |         ./stop-nodes.sh
 51 |         sleep ${SLEEP_TIME}
 52 |     done
 53 | done
 54 |  
 55 | for j in "${!GRAPHS[@]}"; do
 56 |     for ((i = 1; i <= RUNS; i++)); do
 57 |         ./sssp.sh "${GRAPHS[$j]}-adj.txt" ${MACHINES} 0 ${SRC[$j]}
 58 |         ./stop-nodes.sh
 59 |         sleep ${SLEEP_TIME}
 60 |     done
 61 | done
 62 |  
 63 | for graph in "${GRAPHS[@]}"; do
 64 |     for ((i = 1; i <= RUNS; i++)); do
 65 |         ./wcc.sh "${graph}-adj.txt" ${MACHINES} 0
 66 |         ./stop-nodes.sh
 67 |         sleep ${SLEEP_TIME}
 68 |     done
 69 | done
 70 | 
 71 | for graph in "${GRAPHS_MST[@]}"; do
 72 |     for ((i = 1; i <= RUNS; i++)); do
 73 |         ./mst.sh "${graph}-mst-adj.txt" ${MACHINES}
 74 |         ./stop-nodes.sh
 75 |         sleep ${SLEEP_TIME}
 76 |     done
 77 | done
 78 | 
 79 | #./enable-dimest-fix.sh
 80 | #for graph in "${GRAPHS[@]}"; do
 81 | #    for ((i = 1; i <= RUNS; i++)); do
 82 | #        ./dimest.sh "${graph}-adj.txt" ${MACHINES} 0
 83 | #        ./stop-nodes.sh
 84 | #        sleep ${SLEEP_TIME}
 85 | #    done
 86 | #done
 87 | #./disable-dimest-fix.sh
 88 | 
 89 | #################
 90 | # LALP Run
 91 | #################
 92 | for graph in "${GRAPHS[@]}"; do
 93 |     for ((i = 1; i <= RUNS; i++)); do
 94 |         ./pagerank.sh "${graph}-adj.txt" ${MACHINES} 1
 95 |         ./stop-nodes.sh
 96 |         sleep ${SLEEP_TIME}
 97 |     done
 98 | done
 99 |  
100 | for j in "${!GRAPHS[@]}"; do
101 |     for ((i = 1; i <= RUNS; i++)); do
102 |         ./sssp.sh "${GRAPHS[$j]}-adj.txt" ${MACHINES} 1 ${SRC[$j]}
103 |         ./stop-nodes.sh
104 |         sleep ${SLEEP_TIME}
105 |     done
106 | done
107 |  
108 | for graph in "${GRAPHS[@]}"; do
109 |     for ((i = 1; i <= RUNS; i++)); do
110 |         ./wcc.sh "${graph}-adj.txt" ${MACHINES} 1
111 |         ./stop-nodes.sh
112 |         sleep ${SLEEP_TIME}
113 |     done
114 | done
115 | 
116 | # no MST
117 | 
118 | #./enable-dimest-fix.sh
119 | #for graph in "${GRAPHS[@]}"; do
120 | #    for ((i = 1; i <= RUNS; i++)); do
121 | #        ./dimest.sh "${graph}-adj.txt" ${MACHINES} 0
122 | #        ./stop-nodes.sh
123 | #        sleep ${SLEEP_TIME}
124 | #    done
125 | #done
126 | #./disable-dimest-fix.sh
127 | 
128 | #################
129 | # Dynamic Run
130 | #################
131 | for graph in "${GRAPHS[@]}"; do
132 |     for ((i = 1; i <= RUNS; i++)); do
133 |         ./pagerank.sh "${graph}-adj.txt" ${MACHINES} 2
134 |         ./stop-nodes.sh
135 |         sleep ${SLEEP_TIME}
136 |     done
137 | done
138 |  
139 | for j in "${!GRAPHS[@]}"; do
140 |     for ((i = 1; i <= RUNS; i++)); do
141 |         ./sssp.sh "${GRAPHS[$j]}-adj.txt" ${MACHINES} 2 ${SRC[$j]}
142 |         ./stop-nodes.sh
143 |         sleep ${SLEEP_TIME}
144 |     done
145 | done
146 |  
147 | for graph in "${GRAPHS[@]}"; do
148 |     for ((i = 1; i <= RUNS; i++)); do
149 |         ./wcc.sh "${graph}-adj.txt" ${MACHINES} 2
150 |         ./stop-nodes.sh
151 |         sleep ${SLEEP_TIME}
152 |     done
153 | done
154 | 
155 | # no MST
156 | 
157 | #./enable-dimest-fix.sh
158 | #for graph in "${GRAPHS[@]}"; do
159 | #    for ((i = 1; i <= RUNS; i++)); do
160 | #        ./dimest.sh "${graph}-adj.txt" ${MACHINES} 0
161 | #        ./stop-nodes.sh
162 | #        sleep ${SLEEP_TIME}
163 | #    done
164 | #done
165 | #./disable-dimest-fix.sh


--------------------------------------------------------------------------------
/benchmark/gps/debug-site.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | # This runs GPS's web interface to view old runs/logs.
4 | #
5 | # NOTE: Compile debug_monitoring_runner.jar using $GPS_DIR/make_debug_monitoring_runner_jar.sh
6 | 
7 | source "$(dirname "${BASH_SOURCE[0]}")"/../common/get-dirs.sh
8 | 
9 | java -jar "$GPS_DIR"/debug_monitoring_runner.jar -hcf "$HADOOP_DIR"/conf/core-site.xml -msfp /user/${USER}/gps/stats-* -port 4444


--------------------------------------------------------------------------------
/benchmark/gps/dimest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines gps-mode"
 5 |     echo ""
 6 |     echo "gps-mode: 0 for normal (no lalp, no dynamic repartitioning)"
 7 |     echo "          1 for LALP"
 8 |     echo "          2 for dynamic repartitioning"
 9 |     echo "          3 for LALP and dynamic repartitioning"
10 |     exit -1
11 | fi
12 | 
13 | source ../common/get-dirs.sh
14 | source ../common/get-configs.sh
15 | 
16 | # place input in /user/${USER}/input/
17 | # output is in /user/${USER}/gps/output/
18 | inputgraph=$(basename $1)
19 | 
20 | # machines should be number of EC2 instances
21 | machines=$2
22 | workers=$(($machines * $GPS_WPM))
23 | 
24 | mode=$3
25 | case ${mode} in
26 |     0) modeflag="";;
27 |     1) modeflag="-lalp 100";;
28 |     2) modeflag="-dynamic";;
29 |     3) modeflag="-lalp 100 -dynamic";;
30 |     *) echo "Invalid gps-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=dimest_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # GPS statistics (incl running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | # max controls max number of supersteps
43 | ./start-nodes.sh ${workers} quick-start \
44 |     ${modeflag} \
45 |     -ifs /user/${USER}/input/${inputgraph} \
46 |     -hcf "$HADOOP_DIR"/conf/core-site.xml \
47 |     -jc gps.examples.dimest.DiameterEstimationVertex###JobConfiguration \
48 |     -mcfg /user/${USER}/gps-machine-config/machine.cfg \
49 |     -log4jconfig "$GPS_DIR"/conf/log4j.config \
50 |     -other -max###30
51 | 
52 | ## finish logging memory + network usage
53 | ../common/bench-finish.sh ${logname}
54 | 
55 | ## get stats (see debug_site.sh for debug naming convention)
56 | hadoop dfs -get /user/${USER}/gps/output/quick-start-machine-stats ./logs/${logfile}
57 | #hadoop dfs -mv /user/${USER}/gps/output/quick-start-machine-stats /user/${USER}/gps/stats-${logname}


--------------------------------------------------------------------------------
/benchmark/gps/disable-dimest-fix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Disables the fix for diameter estimation.
 4 | #
 5 | # This should be done before running non-diameter estimation algs.
 6 | 
 7 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 8 | source "$scriptdir"/../common/get-dirs.sh
 9 | 
10 | cd "$GPS_DIR"/src/java/gps/messages/storage
11 | cp -f ArrayBackedIncomingMessageStorage.javaORIGINAL ArrayBackedIncomingMessageStorage.java
12 | 
13 | "$scriptdir"/recompile-gps.sh


--------------------------------------------------------------------------------
/benchmark/gps/enable-dimest-fix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Enables a fix for diameter estimation.
 4 | #
 5 | # This fix should be enabled only for diameter estimation,
 6 | # and should be disabled when running other algorithms.
 7 | 
 8 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 9 | source "$scriptdir"/../common/get-dirs.sh
10 | 
11 | cd "$GPS_DIR"/src/java/gps/messages/storage
12 | cp -f ArrayBackedIncomingMessageStorage.javaDIMEST ArrayBackedIncomingMessageStorage.java
13 | 
14 | "$scriptdir"/recompile-gps.sh


--------------------------------------------------------------------------------
/benchmark/gps/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Initiate GPS by creating slaves and machine config files.
 4 | #
 5 | # NOTE: "slaves" is NOT placed in master-script/, because we use
 6 | # our own scripts for starting/stopping GPS workers.
 7 | 
 8 | cd "$(dirname "${BASH_SOURCE[0]}")"
 9 | source ../common/get-hosts.sh
10 | source ../common/get-dirs.sh
11 | source ../common/get-configs.sh
12 | 
13 | rm -f slaves
14 | rm -f machine.cfg
15 | 
16 | # create slaves file
17 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
18 |     for ((j = 1; j <= ${GPS_WPM}; j++)); do
19 |         echo "${CLUSTER_NAME}${i}" >> slaves
20 |     done
21 | done
22 | 
23 | # create machine config file
24 | echo "-1 ${HOSTNAME} 64000" >> machine.cfg   # master is special
25 | 
26 | w_id=0    # worker counter (needed if workers per machine > 1)
27 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
28 |     # to get multiple workers per machine, use the same name
29 |     # but give it a unique id and port
30 |     for ((j = 1; j <= ${GPS_WPM}; j++)); do
31 |         echo "${w_id} ${CLUSTER_NAME}${i} $((64001 + ${w_id}))" >> machine.cfg
32 |         w_id=$((w_id+1))
33 |     done
34 | done
35 | 
36 | # upload machine config file to HDFS
37 | hadoop dfsadmin -safemode wait > /dev/null
38 | hadoop dfs -rmr /user/${USER}/gps-machine-config/ || true
39 | hadoop dfs -mkdir /user/${USER}/gps-machine-config/
40 | hadoop dfs -put machine.cfg /user/${USER}/gps-machine-config/
41 | 
42 | # make GPS log directories if needed
43 | if [[ ! -d "$GPS_LOG_DIR" ]]; then mkdir -p "$GPS_LOG_DIR"; fi
44 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
45 |     ssh ${CLUSTER_NAME}${i} "if [[ ! -d \"$GPS_LOG_DIR\" ]]; then mkdir -p \"$GPS_LOG_DIR\"; fi" &
46 | done
47 | wait


--------------------------------------------------------------------------------
/benchmark/gps/mst.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 input-graph machines"
 5 |     exit -1
 6 | fi
 7 | 
 8 | source ../common/get-dirs.sh
 9 | source ../common/get-configs.sh
10 | 
11 | # place input in /user/${USER}/input/
12 | # output is in /user/${USER}/gps/output/
13 | inputgraph=$(basename $1)
14 | 
15 | # machines should be number of EC2 instances
16 | machines=$2
17 | workers=$(($machines * $GPS_WPM))
18 | 
19 | ## log names
20 | # MST can only run in "normal" mode (LALP & dynamic repartitioning cannot be used)
21 | logname=mst_${inputgraph}_${machines}_0_"$(date +%Y%m%d-%H%M%S)"
22 | logfile=${logname}_time.txt       # GPS statistics (incl running time)
23 | 
24 | 
25 | ## start logging memory + network usage
26 | ../common/bench-init.sh ${logname}
27 | 
28 | ## start algorithm run
29 | # there are 3 versions of MST... according to author, these are:
30 | #
31 | # edgesatrootpjonebyone uses standard Boruvka (no optimizations)
32 | # edgesatselfpjonebyone uses "storing edges at subvertices" (SEAS)
33 | #   -> "edge cleaning on demand" (ECOD) is enabled via flag
34 | # edgeshybridpjonebyone uses SEAS for few iterations then default... but not published
35 | ./start-nodes.sh ${workers} quick-start \
36 |     -ifs /user/${USER}/input/${inputgraph} \
37 |     -hcf "$HADOOP_DIR"/conf/core-site.xml \
38 |     -jc gps.examples.mst.edgesatrootpjonebyone.JobConfiguration \
39 |     -mcfg /user/${USER}/gps-machine-config/machine.cfg \
40 |     -log4jconfig "$GPS_DIR"/conf/log4j.config
41 | 
42 | ## finish logging memory + network usage
43 | ../common/bench-finish.sh ${logname}
44 | 
45 | ## get stats (see debug_site.sh for debug naming convention)
46 | hadoop dfs -get /user/${USER}/gps/output/quick-start-machine-stats ./logs/${logfile}
47 | #hadoop dfs -mv /user/${USER}/gps/output/quick-start-machine-stats /user/${USER}/gps/stats-${logname}


--------------------------------------------------------------------------------
/benchmark/gps/pagerank.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines gps-mode"
 5 |     echo ""
 6 |     echo "gps-mode: 0 for normal (no lalp, no dynamic repartitioning)"
 7 |     echo "          1 for LALP"
 8 |     echo "          2 for dynamic repartitioning"
 9 |     echo "          3 for LALP and dynamic repartitioning"
10 |     exit -1
11 | fi
12 | 
13 | source ../common/get-dirs.sh
14 | source ../common/get-configs.sh
15 | 
16 | # place input in /user/${USER}/input/
17 | # output is in /user/${USER}/gps/output/
18 | inputgraph=$(basename $1)
19 | 
20 | # machines should be number of EC2 instances
21 | machines=$2
22 | workers=$(($machines * $GPS_WPM))
23 | 
24 | mode=$3
25 | case ${mode} in
26 |     0) modeflag="";;
27 |     1) modeflag="-lalp 100";;
28 |     2) modeflag="-dynamic";;
29 |     3) modeflag="-lalp 100 -dynamic";;
30 |     *) echo "Invalid gps-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=pagerank_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # GPS statistics (incl running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | # max controls max number of supersteps; must be 30, to match Giraph
43 | ./start-nodes.sh ${workers} quick-start \
44 |     ${modeflag} \
45 |     -ifs /user/${USER}/input/${inputgraph} \
46 |     -hcf "$HADOOP_DIR"/conf/core-site.xml \
47 |     -jc gps.examples.pagerank.PageRankVertex###JobConfiguration \
48 |     -mcfg /user/${USER}/gps-machine-config/machine.cfg \
49 |     -log4jconfig "$GPS_DIR"/conf/log4j.config \
50 |     -other -max###30
51 | 
52 | ## finish logging memory + network usage
53 | ../common/bench-finish.sh ${logname}
54 | 
55 | ## get stats (see debug_site.sh for debug naming convention)
56 | hadoop dfs -get /user/${USER}/gps/output/quick-start-machine-stats ./logs/${logfile}
57 | #hadoop dfs -mv /user/${USER}/gps/output/quick-start-machine-stats /user/${USER}/gps/stats-${logname}


--------------------------------------------------------------------------------
/benchmark/gps/recompile-gps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 4 | source "$commondir"/get-hosts.sh
 5 | source "$commondir"/get-dirs.sh
 6 | 
 7 | cd "$GPS_DIR/local-master-scripts/"
 8 | ./make_gps_node_runner_jar.sh
 9 | 
10 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
11 |     scp ../gps_node_runner.jar ${CLUSTER_NAME}${i}:"$GPS_DIR"/gps_node_runner.jar &
12 | done
13 | wait
14 | 
15 | echo "OK."


--------------------------------------------------------------------------------
/benchmark/gps/sssp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 input-graph machines gps-mode source-vertex"
 5 |     echo ""
 6 |     echo "gps-mode: 0 for normal (no lalp, no dynamic repartitioning)"
 7 |     echo "          1 for LALP"
 8 |     echo "          2 for dynamic repartitioning"
 9 |     echo "          3 for LALP and dynamic repartitioning"
10 |     exit -1
11 | fi
12 | 
13 | source ../common/get-dirs.sh
14 | source ../common/get-configs.sh
15 | 
16 | # place input in /user/${USER}/input/
17 | # output is in /user/${USER}/gps/output/
18 | inputgraph=$(basename $1)
19 | 
20 | # machines should be number of EC2 instances
21 | machines=$2
22 | workers=$(($machines * $GPS_WPM))
23 | 
24 | # NOTE: we can only use LALP for SSSP when ALL edge weights are the
25 | # same for the entire graph. In our case, all edge weights are 1.
26 | mode=$3
27 | case ${mode} in
28 |     0) modeflag="";;
29 |     1) modeflag="-lalp 100";;
30 |     2) modeflag="-dynamic";;
31 |     3) modeflag="-lalp 100 -dynamic";;
32 |     *) echo "Invalid gps-mode"; exit -1;;
33 | esac
34 | 
35 | src=$4
36 | 
37 | ## log names
38 | logname=sssp_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
39 | logfile=${logname}_time.txt       # GPS statistics (incl running time)
40 | 
41 | 
42 | ## start logging memory + network usage
43 | ../common/bench-init.sh ${logname}
44 | 
45 | ## start algorithm run
46 | # This SSSP assigns edge weight of 1 to all edges, without using
47 | # the boolean trick of SingleSourceAllVerticesShortestPathVertex.
48 | # Input graph must not have edge weights.
49 | ./start-nodes.sh ${workers} quick-start \
50 |     ${modeflag} \
51 |     -ifs /user/${USER}/input/${inputgraph} \
52 |     -hcf "$HADOOP_DIR"/conf/core-site.xml \
53 |     -jc gps.examples.sssp.SSSPVertex###JobConfiguration \
54 |     -mcfg /user/${USER}/gps-machine-config/machine.cfg \
55 |     -log4jconfig "$GPS_DIR"/conf/log4j.config \
56 |     -other -root###${src}
57 | 
58 | # gps.examples.edgevaluesssp.EdgeValueSSSPVertex###JobConfiguration
59 | # is for when input graph has edge weights.
60 | # input graph must have edge weights, but no vertex values
61 | 
62 | ## finish logging memory + network usage
63 | ../common/bench-finish.sh ${logname}
64 | 
65 | ## get stats (see debug_site.sh for debug naming convention)
66 | hadoop dfs -get /user/${USER}/gps/output/quick-start-machine-stats ./logs/${logfile}
67 | #hadoop dfs -mv /user/${USER}/gps/output/quick-start-machine-stats /user/${USER}/gps/stats-${logname}


--------------------------------------------------------------------------------
/benchmark/gps/start-nodes.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | # A modified version of master-scripts/start_gps_nodes.sh made friendlier
  4 | # for automation. This incorporates scripts/start_gps_node.sh, so worker
  5 | # machines no longer need to be updated with that script. HDFS output paths,
  6 | # log paths, etc. remain unchanged.
  7 | #
  8 | # Note that each machine can have *multiple* workers. Hence, we refer to
  9 | # physical machines as "machines" and workers as "workers" or "slaves".
 10 | #
 11 | # Workers are started asynchronously, which is faster. This script (i.e.,
 12 | # the master) waits until all workers are done computations before exiting,
 13 | # making it easier to script benchmarks. (Although a sleep delay is still
 14 | # required---see the batch benching script.)
 15 | #
 16 | # Because of how GPS behaves, the # of workers argument is actually IGNORED.
 17 | # Instead, we use # of workers specified in machine slaves/config file.
 18 | # Specifically:
 19 | #
 20 | #  >> If argument < # of actual workers, we start # of actual workers.
 21 | #     (Otherwise, GPS will hang waiting for the extra workers)
 22 | #  >> If argument > # of actual workers, we start # of actual workers.
 23 | #     (Because no ports are specified for extra non-existent workers)
 24 | #
 25 | 
 26 | #
 27 | # To change max JVM heap size for GPS workers, see ../common/get-configs.sh.
 28 | #
 29 | 
 30 | # To use this, pass in arguments like:
 31 | #
 32 | #./start-nodes.sh ${workers} quick-start \
 33 | #    -ifs /user/${USER}/input/${inputgraph} \
 34 | #    -hcf "$HADOOP_DIR"/conf/core-site.xml \
 35 | #    -jc gps.examples.pagerank.PageRankVertex###JobConfiguration \
 36 | #    -mcfg /user/${USER}/gps-machine-config/cs848.cfg \
 37 | #    -log4jconfig "$GPS_DIR"/conf/log4j.config \
 38 | #    -other -max###30
 39 | #
 40 | # Note that GPS's default start script requires 3rd argument
 41 | # and onwards to be double-quoted, i.e.:
 42 | #
 43 | #./master-scripts/start_gps_nodes.sh ${workers} quick-start \
 44 | #    "-ifs /user/${USER}/input/${inputgraph} \
 45 | #     -hcf \"$HADOOP_DIR\"/conf/core-site.xml \
 46 | #     -jc gps.examples.pagerank.PageRankVertex###JobConfiguration \
 47 | #     -mcfg /user/${USER}/gps-machine-config/cs848.cfg \
 48 | #     -log4jconfig \"$GPS_DIR\"/conf/log4j.config \
 49 | #     -other -max###30"
 50 | #
 51 | #
 52 | # To start multiple workers per machine, modify the slaves file to be, e.g.
 53 | #
 54 | # cloud1
 55 | # cloud1
 56 | # cloud2
 57 | # cloud2
 58 | #
 59 | # and similarly for the machine config file.
 60 | #
 61 | #
 62 | # Side note: one way to get automation when using the original gps_start_nodes.sh
 63 | # is by modifying the last slave's start_gps_node.sh to not have the "&". That way,
 64 | # since slaves are started sequentially, the last one will return only when the
 65 | # computation is complete.
 66 | 
 67 | if [ $# -lt 3 ]; then
 68 |     echo "usage: $0 workers mode gps-args"
 69 |     echo ""
 70 |     echo "mode: use 'quick-start' (without quotes)"
 71 |     echo "gps-args: arguments passed to GPS jar, unquoted"
 72 |     exit -1
 73 | fi
 74 | 
 75 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 76 | source "$commondir"/get-dirs.sh
 77 | source "$commondir"/get-configs.sh
 78 | 
 79 | 
 80 | OUTPUT_DIR=/user/${USER}/gps/output/
 81 | 
 82 | ## start master
 83 | MASTER_GPS_ID=-1
 84 | GPS_MASTER_XMS=50M     # initial heap size (master)
 85 | 
 86 | echo "Using args: ${@:3}"
 87 | 
 88 | echo "Starting GPS master -1"
 89 | "$JAVA_DIR"/bin/java -Xincgc -Xms${GPS_MASTER_XMS} -Xmx${GPS_MASTER_XMX} -verbose:gc -jar "$GPS_DIR"/gps_node_runner.jar -machineid ${MASTER_GPS_ID} -ofp "$OUTPUT_DIR"/${2}-machine-stats ${@:3} &> "$GPS_LOG_DIR"/${2}-machine${i}-output.txt &
 90 | 
 91 | ## start slaves asynchronously (faster this way)
 92 | GPS_WORKER_XMS=256M   # initial heap size (workers)
 93 | 
 94 | # read-in effectively ensures # of workers never exceeds # of lines in "slaves"
 95 | # the "|| ..." is a workaround in case the file doesn't end with a newline
 96 | w_id=0
 97 | while read slave || [ -n "$slave" ]; do
 98 |     echo "Starting GPS worker ${w_id}"
 99 | 
100 |     # must have -n, otherwise ssh consumes all of stdin (i.e., all of the input file)
101 |     # outer & runs ssh in the background
102 |     # inner & and stdout/err redirections enable ssh connection to end while remote command continues to run
103 |     ssh -n $slave "\"$JAVA_DIR\"/bin/java -Xincgc -Xms${GPS_WORKER_XMS} -Xmx${GPS_WORKER_XMX} -verbose:gc -jar \"$GPS_DIR\"/gps_node_runner.jar -machineid ${w_id} -ofp \"$OUTPUT_DIR\"/${2}-output-${w_id}-of-$((${1}-1)) ${@:3} &> \"$GPS_LOG_DIR\"/${2}-machine${w_id}-output.txt &" &
104 | 
105 |     w_id=$((w_id+1))
106 |     # no need to check if # workers < # slaves... GPS will hang in that situation
107 | done < "$(dirname "${BASH_SOURCE[0]}")"/slaves
108 | 
109 | # ...and wait until computation completes (= master finishes)
110 | wait
111 | echo "Computation complete!"


--------------------------------------------------------------------------------
/benchmark/gps/stop-nodes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Does the same thing as master-scripts/stop_gps_nodes.sh, but faster.
 4 | # Also removes the need for a separate scripts/stop_nodes.sh.
 5 | 
 6 | kill -9 $(ps aux | grep "[g]ps_node_runner" | awk '{print $2}')
 7 | 
 8 | # the "|| ..." is a workaround in case the file doesn't end with a newline
 9 | while read slave || [ -n "$slave" ]; do
10 |     # must have -n, otherwise ssh consumes all of stdin (i.e., all of the input file)
11 |     ssh -n $slave "kill -9 \$(ps aux | grep \"[g]ps_node_runner\" | awk '{print \$2}')" &
12 | done < "$(dirname "${BASH_SOURCE[0]}")"/slaves
13 | wait


--------------------------------------------------------------------------------
/benchmark/gps/wcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines gps-mode"
 5 |     echo ""
 6 |     echo "gps-mode: 0 for normal (no lalp, no dynamic repartitioning)"
 7 |     echo "          1 for LALP"
 8 |     echo "          2 for dynamic repartitioning"
 9 |     echo "          3 for LALP and dynamic repartitioning"
10 |     exit -1
11 | fi
12 | 
13 | source ../common/get-dirs.sh
14 | source ../common/get-configs.sh
15 | 
16 | # place input in /user/${USER}/input/
17 | # output is in /user/${USER}/gps/output/
18 | inputgraph=$(basename $1)
19 | 
20 | # machines should be number of EC2 instances
21 | machines=$2
22 | workers=$(($machines * $GPS_WPM))
23 | 
24 | mode=$3
25 | case ${mode} in
26 |     0) modeflag="";;
27 |     1) modeflag="-lalp 100";;
28 |     2) modeflag="-dynamic";;
29 |     3) modeflag="-lalp 100 -dynamic";;
30 |     *) echo "Invalid gps-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=wcc_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # GPS statistics (incl running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | # NOTE: numMaxIterations can be set but we don't set it
43 | # (to match Giraph and Mizan, neither of which use SS termination)
44 | ./start-nodes.sh ${workers} quick-start \
45 |     ${modeflag} \
46 |     -ifs /user/${USER}/input/${inputgraph} \
47 |     -hcf "$HADOOP_DIR"/conf/core-site.xml \
48 |     -jc gps.examples.wcc.WeaklyConnectedComponentsVertex###JobConfiguration \
49 |     -mcfg /user/${USER}/gps-machine-config/machine.cfg \
50 |     -log4jconfig "$GPS_DIR"/conf/log4j.config
51 | 
52 | ## finish logging memory + network usage
53 | ../common/bench-finish.sh ${logname}
54 | 
55 | ## get stats (see debug_site.sh for debug naming convention)
56 | hadoop dfs -get /user/${USER}/gps/output/quick-start-machine-stats ./logs/${logfile}
57 | #hadoop dfs -mv /user/${USER}/gps/output/quick-start-machine-stats /user/${USER}/gps/stats-${logname}


--------------------------------------------------------------------------------
/benchmark/graphlab/benchall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 machines runs"
 5 |     echo ""
 6 |     echo "machines: 4, 8, 16, 32, 64, or 128"
 7 |     exit -1
 8 | fi
 9 | 
10 | cd "$(dirname "${BASH_SOURCE[0]}")"
11 | 
12 | MACHINES=$1
13 | RUNS=$2
14 | 
15 | case ${MACHINES} in
16 |     4)   GRAPHS=(amazon google patents);
17 |          TOL=(0.408805 2.306985 2.220446E-16);   # for PageRank
18 |          SRC=(0 0 6009554);;  # for SSSP
19 |     8)   GRAPHS=(amazon google patents);
20 |          TOL=(0.408805 2.306985 2.220446E-16);
21 |          SRC=(0 0 6009554);;
22 |     16)  GRAPHS=(livejournal orkut arabic twitter);
23 |          TOL=(0.392500 0.011872 75.448252 0.769316);
24 |          SRC=(0 1 3 0);;
25 |     32)  GRAPHS=(livejournal orkut arabic twitter);
26 |          TOL=(0.392500 0.011872 75.448252 0.769316);
27 |          SRC=(0 1 3 0);;
28 |     64)  GRAPHS=(livejournal orkut arabic twitter uk0705);
29 |          TOL=(0.392500 0.011872 75.448252 0.769316 186.053578);
30 |          SRC=(0 1 3 0 0);;
31 |     128) GRAPHS=(livejournal orkut arabic twitter uk0705);
32 |          TOL=(0.392500 0.011872 75.448252 0.769316 186.053578);
33 |          SRC=(0 1 3 0 0);;
34 |     *) echo "Invalid machines"; exit -1;;
35 | esac
36 | 
37 | #################
38 | # Sync run
39 | #################
40 | # we split the algs up for simplicity
41 | for j in "${!GRAPHS[@]}"; do
42 |     for ((i = 1; i <= RUNS; i++)); do
43 |         ./pagerank.sh "${GRAPHS[$j]}-adj-split/" ${MACHINES} 0 ${TOL[$j]}
44 |     done
45 | done
46 |  
47 | for j in "${!GRAPHS[@]}"; do
48 |     for ((i = 1; i <= RUNS; i++)); do
49 |         ./sssp.sh "${GRAPHS[$j]}-adj-split/" ${MACHINES} 0 ${SRC[$j]}
50 |     done
51 | done
52 |  
53 | for graph in "${GRAPHS[@]}"; do
54 |     for ((i = 1; i <= RUNS; i++)); do
55 |         ./wcc.sh "${graph}-adj-split/" ${MACHINES}
56 |     done
57 | done
58 | 
59 | #for graph in "${GRAPHS[@]}"; do
60 | #    for ((i = 1; i <= RUNS; i++)); do
61 | #        ./dimest.sh "${graph}-adj-split/" ${MACHINES}
62 | #    done
63 | #done
64 | 
65 | #################
66 | # Async Run
67 | #################
68 | for j in "${!GRAPHS[@]}"; do
69 |     for ((i = 1; i <= RUNS; i++)); do
70 |         ./pagerank.sh "${GRAPHS[$j]}-adj-split/" ${MACHINES} 1 ${TOL[$j]}
71 |     done
72 | done
73 | 
74 | for j in "${!GRAPHS[@]}"; do
75 |     for ((i = 1; i <= RUNS; i++)); do
76 |         ./sssp.sh "${GRAPHS[$j]}-adj-split/" ${MACHINES} 1 ${SRC[$j]}
77 |     done
78 | done
79 | 
80 | # no WCC
81 | # no dimest


--------------------------------------------------------------------------------
/benchmark/graphlab/dimest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 input-graph machines"
 5 |     exit -1
 6 | fi
 7 | 
 8 | source ../common/get-dirs.sh
 9 | 
10 | # place input in /user/${USER}/input/
11 | # output is in /user/${USER}/graphlab-output/
12 | inputgraph=$(basename $1)
13 | outputdir=/user/${USER}/graphlab-output/
14 | hadoop dfs -rmr "$outputdir" || true
15 | 
16 | hdfspath=$(grep hdfs "$HADOOP_DIR"/conf/core-site.xml | sed -e 's/.*<value>//' -e 's@</value>.*@@')
17 | 
18 | machines=$2
19 | 
20 | ## log names
21 | # diameter estimation only supports synchronous mode
22 | logname=dimest_${inputgraph}_${machines}_0_"$(date +%Y%m%d-%H%M%S)"
23 | logfile=${logname}_time.txt
24 | 
25 | 
26 | ## start logging memory + network usage
27 | ../common/bench-init.sh ${logname}
28 | 
29 | ## start algorithm run
30 | mpiexec -f ./machines -n ${machines} \
31 |     "$GRAPHLAB_DIR"/release/toolkits/graph_analytics/approximate_diameter \
32 |     --format adjgps \
33 |     --graph_opts ingress=random \
34 |     --graph "$hdfspath"/user/${USER}/input/${inputgraph} 2>&1 | tee -a ./logs/${logfile}
35 | # NOTE: no saveprefix option, diameters/results are outputted to time log
36 | 
37 | ## finish logging memory + network usage
38 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/graphlab/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Initiate GraphLab by creating machine file.
 4 | # The contents actually correspond to physical machines.
 5 | 
 6 | cd "$(dirname "${BASH_SOURCE[0]}")"
 7 | source ../common/get-hosts.sh
 8 | 
 9 | # create machines file
10 | rm -f machines
11 | 
12 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
13 |     echo "${CLUSTER_NAME}${i}" >> machines
14 | done


--------------------------------------------------------------------------------
/benchmark/graphlab/pagerank.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 input-graph machines engine-mode tolerance"
 5 |     echo ""
 6 |     echo "engine-mode: 0 for synchronous engine"
 7 |     echo "             1 for asynchronous engine"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | 
13 | # place input in /user/${USER}/input/
14 | # output is in /user/${USER}/graphlab-output/
15 | inputgraph=$(basename $1)
16 | outputdir=/user/${USER}/graphlab-output/
17 | hadoop dfs -rmr "$outputdir" || true
18 | 
19 | hdfspath=$(grep hdfs "$HADOOP_DIR"/conf/core-site.xml | sed -e 's/.*<value>//' -e 's@</value>.*@@')
20 | 
21 | machines=$2
22 | 
23 | mode=$3
24 | case ${mode} in
25 |     0) modeflag="sync";;
26 |     1) modeflag="async";;
27 |     *) echo "Invalid engine-mode"; exit -1;;
28 | esac
29 | 
30 | tol=$4
31 | 
32 | ## log names
33 | logname=pagerank_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
34 | logfile=${logname}_time.txt
35 | 
36 | 
37 | ## start logging memory + network usage
38 | ../common/bench-init.sh ${logname}
39 | 
40 | ## start algorithm run
41 | mpiexec -f ./machines -n ${machines} \
42 |     "$GRAPHLAB_DIR"/release/toolkits/graph_analytics/pagerank \
43 |     --tol ${tol} \
44 |     --engine ${modeflag} \
45 |     --format adjgps \
46 |     --graph_opts ingress=random \
47 |     --graph "$hdfspath"/user/${USER}/input/${inputgraph} \
48 |     --saveprefix "$hdfspath"/"$outputdir" 2>&1 | tee -a ./logs/${logfile}
49 | 
50 | ## finish logging memory + network usage
51 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/graphlab/recompile-graphlab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 4 | source "$commondir"/get-hosts.sh
 5 | source "$commondir"/get-dirs.sh
 6 | 
 7 | # recompile GraphLab
 8 | cd "$GRAPHLAB_DIR"/release/toolkits/graph_analytics/
 9 | make -j $(nproc)
10 | 
11 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
12 |     # NOTE: only copy binaries that will actually be used.. it takes too long otherwise
13 |     scp ./pagerank ${CLUSTER_NAME}${i}:"$GRAPHLAB_DIR"/release/toolkits/graph_analytics/ &
14 |     scp ./sssp ${CLUSTER_NAME}${i}:"$GRAPHLAB_DIR"/release/toolkits/graph_analytics/ &
15 |     scp ./connected_component ${CLUSTER_NAME}$i:"$GRAPHLAB_DIR"/release/toolkits/graph_analytics/ &
16 |     scp ./approximate_diameter ${CLUSTER_NAME}$i:"$GRAPHLAB_DIR"/release/toolkits/graph_analytics/ &
17 | 
18 |     rsync -avz --exclude '*.make' --exclude '*.cmake' "$GRAPHLAB_DIR"/deps/local/ ${CLUSTER_NAME}${i}:"$GRAPHLAB_DIR"/deps/local 
19 | done
20 | wait
21 | 
22 | echo "OK."


--------------------------------------------------------------------------------
/benchmark/graphlab/sssp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 input-graph machines engine-mode source-vertex"
 5 |     echo ""
 6 |     echo "engine-mode: 0 for synchronous engine"
 7 |     echo "             1 for asynchronous engine"
 8 |     exit -1
 9 | fi
10 | 
11 | source ../common/get-dirs.sh
12 | 
13 | # place input in /user/${USER}/input/
14 | # output is in /user/${USER}/graphlab-output/
15 | inputgraph=$(basename $1)
16 | outputdir=/user/${USER}/graphlab-output/
17 | hadoop dfs -rmr "$outputdir" || true
18 | 
19 | hdfspath=$(grep hdfs "$HADOOP_DIR"/conf/core-site.xml | sed -e 's/.*<value>//' -e 's@</value>.*@@')
20 | 
21 | machines=$2
22 | 
23 | mode=$3
24 | case ${mode} in
25 |     0) modeflag="sync";;
26 |     1) modeflag="async";;
27 |     *) echo "Invalid engine-mode"; exit -1;;
28 | esac
29 | 
30 | src=$4
31 | 
32 | ## log names
33 | logname=sssp_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
34 | logfile=${logname}_time.txt
35 | 
36 | 
37 | ## start logging memory + network usage
38 | ../common/bench-init.sh ${logname}
39 | 
40 | ## start algorithm run
41 | mpiexec -f ./machines -n ${machines} \
42 |     "$GRAPHLAB_DIR"/release/toolkits/graph_analytics/sssp \
43 |     --source ${src} \
44 |     --directed 1 \
45 |     --engine ${modeflag} \
46 |     --format adjgps \
47 |     --graph_opts ingress=random \
48 |     --graph "$hdfspath"/user/${USER}/input/${inputgraph} \
49 |     --saveprefix "$hdfspath"/"$outputdir" 2>&1 | tee -a ./logs/${logfile}
50 | 
51 | ## finish logging memory + network usage
52 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/graphlab/wcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 input-graph machines"
 5 |     exit -1
 6 | fi
 7 | 
 8 | source ../common/get-dirs.sh
 9 | 
10 | # place input in /user/${USER}/input/
11 | # output is in /user/${USER}/graphlab-output/
12 | inputgraph=$(basename $1)
13 | outputdir=/user/${USER}/graphlab-output/
14 | hadoop dfs -rmr "$outputdir" || true
15 | 
16 | hdfspath=$(grep hdfs "$HADOOP_DIR"/conf/core-site.xml | sed -e 's/.*<value>//' -e 's@</value>.*@@')
17 | 
18 | machines=$2
19 | 
20 | ## log names
21 | # WCC only supports synchronous mode
22 | logname=wcc_${inputgraph}_${machines}_0_"$(date +%Y%m%d-%H%M%S)"
23 | logfile=${logname}_time.txt
24 | 
25 | 
26 | ## start logging memory + network usage
27 | ../common/bench-init.sh ${logname}
28 | 
29 | ## start algorithm run
30 | mpiexec -f ./machines -n ${machines} \
31 |     "$GRAPHLAB_DIR"/release/toolkits/graph_analytics/connected_component \
32 |     --format adjgps \
33 |     --graph_opts ingress=random \
34 |     --graph "$hdfspath"/user/${USER}/input/${inputgraph} \
35 |     --saveprefix "$hdfspath"/"$outputdir" 2>&1 | tee -a ./logs/${logfile}
36 | 
37 | ## finish logging memory + network usage
38 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/hadoop/init.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | # Initiate Hadoop by preparing the necessary config files
  4 | # and copying them to all worker machines.
  5 | #
  6 | # To change the max JVM heap size for Hadoop mappers
  7 | # (which will only affect Giraph), see ./get-configs.sh.
  8 | #
  9 | # NOTE: if testing on a single machine (i.e., pseudo-distributed),
 10 | # slaves will have to be edited manually.
 11 | 
 12 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 13 | source "$commondir"/get-hosts.sh
 14 | source "$commondir"/get-dirs.sh
 15 | source "$commondir"/get-configs.sh
 16 | 
 17 | cd "$HADOOP_DIR/conf/"
 18 | 
 19 | 
 20 | # masters and slaves
 21 | echo "${HOSTNAME}" > masters
 22 | 
 23 | rm -f slaves
 24 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
 25 |     echo "${CLUSTER_NAME}${i}" >> slaves
 26 | done
 27 | 
 28 | 
 29 | # core-site.xml
 30 | echo "<?xml version=\"1.0\"?>
 31 | <?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>
 32 | 
 33 | <!-- Put site-specific property overrides in this file. -->
 34 | 
 35 | <configuration>
 36 |   <property>
 37 |     <name>hadoop.tmp.dir</name>
 38 |     <value>${HADOOP_DATA_DIR}/hadoop_tmp-\${user.name}</value>
 39 |   </property>
 40 |   <property>
 41 |     <name>fs.default.name</name>
 42 |     <value>hdfs://${HOSTNAME}:54310</value>
 43 |   </property>
 44 |   <property>
 45 |     <name>fs.checkpoint.edits.dir</name>
 46 |     <value>${HADOOP_DATA_DIR}/hadoop_checkpoint-\${user.name}</value>
 47 |   </property>
 48 | </configuration>" > core-site.xml
 49 | 
 50 | 
 51 | # hdfs-site.xml (not really needed, but here it is)
 52 | echo '<?xml version="1.0"?>
 53 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 54 | 
 55 | <!-- Put site-specific property overrides in this file. -->
 56 | <configuration>
 57 |   <property>
 58 |     <name>dfs.replication</name>
 59 |     <value>1</value>
 60 |   </property>
 61 |   <property>
 62 |     <name>dfs.permissions</name>
 63 |     <value>false</value>
 64 |   </property>
 65 | </configuration>' > hdfs-site.xml
 66 | 
 67 | 
 68 | # mapred-site.xml
 69 | echo "<?xml version=\"1.0\"?>
 70 | <?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>
 71 | 
 72 | <!-- Put site-specific property overrides in this file. -->
 73 | 
 74 | <configuration>
 75 |   <property>
 76 |     <name>mapred.job.tracker</name>
 77 |     <value>${HOSTNAME}:54311</value>
 78 |   </property>
 79 |   <property>
 80 |     <name>mapred.local.dir</name>
 81 |     <value>${HADOOP_DATA_DIR}/hadoop_local-\${user.name}</value>
 82 |   </property>
 83 |   <property>
 84 |     <name>mapred.child.tmp</name>
 85 |     <value>${HADOOP_DATA_DIR}/hadoop_child-\${user.name}</value>
 86 |   </property>
 87 |   <property>
 88 |     <name>mapred.job.tracker.persist.jobstatus.dir</name>
 89 |     <value>/home/${USER}/hadoop_jobstatus-\${user.name}</value>
 90 |   </property>
 91 |   <property>
 92 |     <name>mapred.tasktracker.map.tasks.maximum</name>
 93 |     <value>5</value>
 94 |   </property>
 95 |   <property>
 96 |     <name>mapred.tasktracker.reduce.tasks.maximum</name>
 97 |     <value>5</value>
 98 |   </property>
 99 |   <property>
100 |     <name>mapred.map.tasks</name>
101 |     <value>5</value>
102 |   </property>
103 |   <property>
104 |     <name>mapred.reduce.tasks</name>
105 |     <value>10</value>    <!-- determines # of reduce tasks for premizan (modhash + 5a/5b) -->
106 |   </property>
107 |   <property>
108 |     <name>mapreduce.job.counters.max</name>
109 |     <value>1000000</value>
110 |   </property>
111 |   <property>
112 |     <name>mapreduce.job.counters.limit</name>
113 |     <value>1000000</value>
114 |   </property>
115 |   <property>
116 |     <name>mapred.child.java.opts</name>
117 |     <value>-Xmx${GIRAPH_XMX}</value>
118 |   </property>
119 | </configuration>" > mapred-site.xml
120 | 
121 | 
122 | # copy configs to worker machines
123 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
124 |     rsync -avz ./* ${CLUSTER_NAME}${i}:"$HADOOP_DIR"/conf/ &
125 | done
126 | wait


--------------------------------------------------------------------------------
/benchmark/hadoop/restart-hadoop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Restarts Hadoop and kills any lingering Java processes.
 4 | # This is indiscriminate---it will kill ALL Java processes.
 5 | #
 6 | # NOTE: To programmatically detect when Hadoop is up, use
 7 | # "hadoop dfsadmin -safemode wait" or pass in "1" as arg.
 8 | #
 9 | # usage: ./restart-hadoop.sh [wait?]
10 | #
11 | # wait: 0 for no wait, 1 to wait for Hadoop to start
12 | 
13 | source "$(dirname "${BASH_SOURCE[0]}")"/../common/get-hosts.sh
14 | 
15 | stop-all.sh
16 | 
17 | # do a kill on the master separately---this is useful when testing on a single machine
18 | kill -9 $(pgrep java)
19 | 
20 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
21 |     ssh ${CLUSTER_NAME}${i} "kill -9 \$(pgrep java)" &
22 | done
23 | wait
24 | 
25 | start-all.sh
26 | 
27 | if [[ $# -eq 1 && $1 -eq 1 ]]; then
28 |     # wait until Hadoop is up
29 |     hadoop dfsadmin -safemode wait
30 | fi


--------------------------------------------------------------------------------
/benchmark/init-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Initialize Hadoop and all systems.
 4 | #
 5 | # NOTE: before doing this, ensure:
 6 | #   1. All machines have correct hostnames, /etc/hostname, and /etc/hosts
 7 | #   2. Master has correct JVM Xmx size set for Giraph and GPS
 8 | #
 9 | # For (1), see ../ec2/uw-ec2.py init
10 | # For (2), see ./common/get-config.sh
11 | #
12 | # To check connectivity, use ./common/ssh-check.sh
13 | 
14 | cd "$(dirname "${BASH_SOURCE[0]}")"
15 | source ./common/get-hosts.sh
16 | source ./common/get-dirs.sh
17 | 
18 | # remove known_hosts (kills stale fingerprints)
19 | echo "Removing known_hosts..."
20 | rm -f ~/.ssh/known_hosts
21 | 
22 | echo "Creating known_hosts..."
23 | for ((i = 0; i <= ${NUM_MACHINES}; i++)); do
24 |     ssh -q -o StrictHostKeyChecking=no ${CLUSTER_NAME}${i} "exit" &
25 | done
26 | wait
27 | 
28 | echo "Updating Hadoop configs..."
29 | ./hadoop/init.sh > /dev/null      # quiet
30 | 
31 | 
32 | ###############
33 | # Hadoop
34 | ###############
35 | # remove old HDFS data (on master and worker machines)
36 | # NOTE: removing HDFS folder will kill targets of symlinks in logs/userlogs/
37 | echo "Removing old HDFS data and Hadoop logs..."
38 | 
39 | stop-all.sh > /dev/null   # just in case anything is running
40 | 
41 | for ((i = 0; i <= ${NUM_MACHINES}; i++)); do
42 |     ssh ${CLUSTER_NAME}${i} "rm -rf \"$HADOOP_DATA_DIR\"; rm -rf \"$HADOOP_DIR\"/logs/*" &
43 | done
44 | wait
45 | 
46 | # create new HDFS & start Hadoop
47 | echo "Creating new HDFS..."
48 | hadoop namenode -format
49 | 
50 | echo "Starting up Hadoop..."
51 | start-all.sh
52 | 
53 | # wait until Hadoop starts up (HDFS exits safemode)
54 | echo "Waiting for Hadoop to start..."
55 | hadoop dfsadmin -safemode wait > /dev/null
56 | 
57 | 
58 | ###############
59 | # Systems
60 | ###############
61 | # nothing to do for Giraph
62 | 
63 | echo "Initializing GPS..."
64 | ./gps/init.sh
65 | 
66 | echo "Initializing GraphLab..."
67 | ./graphlab/init.sh
68 | 
69 | echo "Initializing Mizan..."
70 | ./mizan/init.sh
71 | 
72 | 
73 | ###############
74 | # Datasets
75 | ###############
76 | hadoop dfs -mkdir ./input || true
77 | #echo "Loading datasets..."
78 | #./datasets/load-files.sh


--------------------------------------------------------------------------------
/benchmark/local-init.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | # Initialize Hadoop and all systems for local testing.
  4 | #
  5 | # This is for LOCAL TESTING only!! Ensure that:
  6 | #   1. LOCAL_MACHINES is the number of pseudo-machines you want.
  7 | #   2. ./common/get-dirs.sh has a correct DIR_PREFIX
  8 | #   3. ./common/get-config.sh has correct JVM Xmx sizes
  9 | 
 10 | # number of pseudo machines to use
 11 | # adjust JVM Xmx accordingly to avoid running out of memory!
 12 | LOCAL_MACHINES=1
 13 | 
 14 | 
 15 | cd "$(dirname "${BASH_SOURCE[0]}")"
 16 | source ./common/get-dirs.sh
 17 | source ./common/get-configs.sh
 18 | 
 19 | echo "Generating get-hosts.sh..."
 20 | echo '#!/bin/bash
 21 | 
 22 | # Set the prefix name and number of slaves/worker machines.
 23 | # NOTE: This file is automatically generated by local-init.sh!
 24 | 
 25 | HOSTNAME=$(hostname)
 26 | CLUSTER_NAME=HOSTNAME
 27 | NUM_MACHINES=0' > ./common/get-hosts.sh
 28 | 
 29 | source ./common/get-hosts.sh
 30 | 
 31 | 
 32 | echo "Updating Hadoop configs..."
 33 | ./hadoop/init.sh > /dev/null      # quiet
 34 | 
 35 | # for local testing, need to create slave manually
 36 | rm -f "$HADOOP_DIR"/conf/slaves
 37 | for ((i = 1; i <= ${LOCAL_MACHINES}; i++)); do
 38 |     echo "localhost" >> "$HADOOP_DIR"/conf/slaves
 39 | done
 40 | 
 41 | ###############
 42 | # Hadoop
 43 | ###############
 44 | # remove old HDFS data (on master and worker machines)
 45 | # NOTE: removing HDFS folder will kill targets of symlinks in logs/userlogs/
 46 | echo "Removing old HDFS data and Hadoop logs..."
 47 | 
 48 | stop-all.sh > /dev/null   # just in case anything is running
 49 | 
 50 | rm -rf "$HADOOP_DATA_DIR"
 51 | rm -rf "$HADOOP_DIR"/logs/*
 52 | 
 53 | # create new HDFS & start Hadoop
 54 | echo "Creating new HDFS..."
 55 | hadoop namenode -format
 56 | 
 57 | echo "Starting up Hadoop..."
 58 | start-all.sh
 59 | 
 60 | # wait until Hadoop starts up (HDFS exits safemode)
 61 | echo "Waiting for Hadoop to start..."
 62 | hadoop dfsadmin -safemode wait > /dev/null
 63 | 
 64 | # NOTE: for some reason HDFS is still not ready after safemode is off,
 65 | # so sleep for 30s to ensure GPS init will succeed
 66 | sleep 30
 67 | 
 68 | ###############
 69 | # Systems
 70 | ###############
 71 | # NOTE: we're duplicating each system's init.sh file...
 72 | # It's a little messy but avoids cluttering up the existing files
 73 | 
 74 | # nothing to do for Giraph
 75 | 
 76 | echo "Initializing GPS..."
 77 | rm -f ./gps/slaves
 78 | rm -f ./gps/machine.cfg
 79 | 
 80 | # create slaves file
 81 | for ((i = 1; i <= ${LOCAL_MACHINES}; i++)); do
 82 |     for ((j = 1; j <= ${GPS_WPM}; j++)); do
 83 |         echo "localhost" >> ./gps/slaves
 84 |     done
 85 | done
 86 | 
 87 | # create machine config file
 88 | echo "-1 ${HOSTNAME} 64000" >> ./gps/machine.cfg
 89 | 
 90 | w_id=0    # worker counter (needed if workers per pseudo-machine > 1)
 91 | for ((i = 1; i <= ${LOCAL_MACHINES}; i++)); do
 92 |     for ((j = 1; j <= ${GPS_WPM}; j++)); do
 93 |         echo "${w_id} localhost $((64001 + ${w_id}))" >> ./gps/machine.cfg
 94 |         w_id=$((w_id+1))
 95 |     done
 96 | done
 97 | 
 98 | hadoop dfs -rmr /user/${USER}/gps-machine-config/ || true
 99 | hadoop dfs -mkdir /user/${USER}/gps-machine-config/
100 | hadoop dfs -put ./gps/machine.cfg /user/${USER}/gps-machine-config/
101 | if [[ ! -d "$GPS_LOG_DIR" ]]; then mkdir -p "$GPS_LOG_DIR"; fi
102 | 
103 | 
104 | echo "Initializing GraphLab..."
105 | rm -f ./graphlab/machines
106 | for ((i = 1; i <= ${LOCAL_MACHINES}; i++)); do
107 |     echo "localhost" >> ./graphlab/machines
108 | done
109 | 
110 | echo "Initializing Mizan..."
111 | rm -f ./mizan/slaves
112 | for ((i = 1; i <= ${LOCAL_MACHINES}; i++)); do
113 |     for ((j = 1; j <= ${MIZAN_WPM}; j++)); do
114 |         echo "localhost" >> ./mizan/slaves
115 |     done
116 | done
117 | 
118 | ###############
119 | # Datasets
120 | ###############
121 | hadoop dfs -mkdir ./input || true
122 | #echo "Loading datasets..."
123 | #./datasets/load-files.sh


--------------------------------------------------------------------------------
/benchmark/mizan/benchall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |     echo "usage: $0 machines runs"
 5 |     echo ""
 6 |     echo "machines: 4, 8, 16, 32, 64, or 128"
 7 |     exit -1
 8 | fi
 9 | 
10 | cd "$(dirname "${BASH_SOURCE[0]}")"
11 | 
12 | MACHINES=$1
13 | RUNS=$2
14 | 
15 | case ${MACHINES} in
16 |     4)   GRAPHS=(amazon google patents);
17 |          SRC=(0 0 6009554);;  # for SSSP
18 |     8)   GRAPHS=(amazon google patents);
19 |          SRC=(0 0 6009554);;
20 |     16)  GRAPHS=(livejournal orkut arabic);
21 |          SRC=(0 1 3);;
22 |     32)  GRAPHS=(livejournal orkut arabic);
23 |          SRC=(0 1 3);;
24 |     64)  GRAPHS=(livejournal orkut arabic);
25 |          SRC=(0 1 3);;
26 |     128) GRAPHS=(livejournal orkut arabic twitter);
27 |          SRC=(0 1 3 0);;
28 |     *) echo "Invalid machines"; exit -1;;
29 | esac
30 | 
31 | 
32 | ##################
33 | # Premizan
34 | ##################
35 | for graph in "${GRAPHS[@]}"; do
36 |     for ((i = 1; i <= RUNS; i++)); do
37 |         ./premizan.sh "${graph}.txt" ${MACHINES} 1
38 |     done
39 | done
40 | 
41 | ##################
42 | # Static run
43 | ##################
44 | # we split the algs up for clarity
45 | for graph in "${GRAPHS[@]}"; do
46 |     for ((i = 1; i <= RUNS; i++)); do
47 |         ./pagerank.sh "${graph}.txt" ${MACHINES} 0
48 |     done
49 | done
50 | 
51 | for j in "${!GRAPHS[@]}"; do
52 |     for ((i = 1; i <= RUNS; i++)); do
53 |         ./sssp.sh "${GRAPHS[$j]}.txt" ${MACHINES} 0 ${SRC[$j]}
54 |     done
55 | done
56 | 
57 | for graph in "${GRAPHS[@]}"; do
58 |     for ((i = 1; i <= RUNS; i++)); do
59 |         ./wcc.sh "${graph}.txt" ${MACHINES} 0
60 |     done
61 | done
62 | 
63 | # MST does not work (issues w/ aggregators + graph mutation in 0.1bu1)
64 | #for graph in "${GRAPHS[@]}"; do
65 | #    for ((i = 1; i <= RUNS; i++)); do
66 | #        ./mst.sh "${graph}-mst.txt" ${MACHINES} 0
67 | #    done
68 | #done
69 | 
70 | #for graph in "${GRAPHS[@]}"; do
71 | #    for ((i = 1; i <= RUNS; i++)); do
72 | #        ./dimest.sh "${graph}.txt" ${MACHINES} 0
73 | #    done
74 | #done
75 | 
76 | ## Other Mizan modes aren't working correctly,
77 | ## so we cannot test them


--------------------------------------------------------------------------------
/benchmark/mizan/dimest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines migration-mode"
 5 |     echo ""
 6 |     echo "migration-mode: 0 for static (no dynamic migration)"
 7 |     echo "                1 for delayed migration"
 8 |     echo "                2 for mixed migration"
 9 |     exit -1
10 | fi
11 | 
12 | source ../common/get-dirs.sh
13 | source ../common/get-configs.sh
14 | 
15 | # place input into /user/${USER}/input/ (this is where preMizan looks)
16 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
17 | #  (or _mrange_${workers} if using range partitioning)
18 | # output of algorithm is in /user/${USER}/mizan-output/
19 | inputgraph=$(basename $1)
20 | 
21 | # we can have multiple workers per machine
22 | machines=$2
23 | workers=$(($machines * $MIZAN_WPM))
24 | 
25 | mode=$3
26 | case ${mode} in
27 |     0) modeflag="1";;
28 |     1) modeflag="2";;
29 |     2) modeflag="3";;
30 |     *) echo "Invalid migration-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=dimest_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # Mizan stats (incl. running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | mpirun -f slaves -np ${workers} "$MIZAN_DIR"/Release/Mizan-0.1b \
43 |     -a 3 \
44 |     -s 30 \
45 |     -u ${USER} \
46 |     -g ${inputgraph} \
47 |     -w ${workers} \
48 |     -m ${modeflag} 2>&1 | tee -a ./logs/${logfile}
49 | 
50 | ## finish logging memory + network usage
51 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/mizan/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Initiate Mizan by creating machine file.
 4 | 
 5 | cd "$(dirname "${BASH_SOURCE[0]}")"
 6 | source ../common/get-hosts.sh
 7 | source ../common/get-configs.sh
 8 | 
 9 | # create slaves file
10 | rm -f slaves
11 | 
12 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
13 |     for ((j = 1; j <= ${MIZAN_WPM}; j++)); do
14 |         echo "${CLUSTER_NAME}${i}" >> slaves
15 |     done
16 | done


--------------------------------------------------------------------------------
/benchmark/mizan/mst.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines migration-mode"
 5 |     echo ""
 6 |     echo "migration-mode: 0 for static (no dynamic migration)"
 7 |     echo "                1 for delayed migration"
 8 |     echo "                2 for mixed migration"
 9 |     exit -1
10 | fi
11 | 
12 | source ../common/get-dirs.sh
13 | source ../common/get-configs.sh
14 | 
15 | # place input into /user/${USER}/input/ (this is where preMizan looks)
16 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
17 | #  (or _mrange_${workers} if using range partitioning)
18 | # output of algorithm is in /user/${USER}/mizan-output/
19 | inputgraph=$(basename $1)
20 | 
21 | # we can have multiple workers per machine
22 | machines=$2
23 | workers=$(($machines * $MIZAN_WPM))
24 | 
25 | mode=$3
26 | case ${mode} in
27 |     0) modeflag="1";;
28 |     1) modeflag="2";;
29 |     2) modeflag="3";;
30 |     *) echo "Invalid migration-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=mst_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # Mizan stats (incl. running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | mpirun -f slaves -np ${workers} "$MIZAN_DIR"/Release/Mizan-0.1b \
43 |     -a 7 \
44 |     -u ${USER} \
45 |     -g ${inputgraph} \
46 |     -w ${workers} \
47 |     -m ${modeflag} 2>&1 | tee -a ./logs/${logfile}
48 | 
49 | ## finish logging memory + network usage
50 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/mizan/pagerank.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines migration-mode"
 5 |     echo ""
 6 |     echo "migration-mode: 0 for static (no dynamic migration)"
 7 |     echo "                1 for delayed migration"
 8 |     echo "                2 for mixed migration"
 9 |     exit -1
10 | fi
11 | 
12 | source ../common/get-dirs.sh
13 | source ../common/get-configs.sh
14 | 
15 | # place input into /user/${USER}/input/ (this is where preMizan looks)
16 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
17 | #  (or _mrange_${workers} if using range partitioning)
18 | # output of algorithm is in /user/${USER}/mizan-output/
19 | inputgraph=$(basename $1)
20 | 
21 | # we can have multiple workers per machine
22 | machines=$2
23 | workers=$(($machines * $MIZAN_WPM))
24 | 
25 | mode=$3
26 | case ${mode} in
27 |     0) modeflag="1";;
28 |     1) modeflag="2";;
29 |     2) modeflag="3";;
30 |     *) echo "Invalid migration-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=pagerank_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # Mizan stats (incl. running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | mpirun -f slaves -np ${workers} "$MIZAN_DIR"/Release/Mizan-0.1b \
43 |     -a 1 \
44 |     -s 30 \
45 |     -u ${USER} \
46 |     -g ${inputgraph} \
47 |     -w ${workers} \
48 |     -m ${modeflag} 2>&1 | tee -a ./logs/${logfile}
49 | 
50 | ## finish logging memory + network usage
51 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/mizan/premizan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Performs Mizan's prepartitioning phase. This is mandatory as
 4 | # Mizan expects input to be pre-partitioned in a specific way.
 5 | 
 6 | # partition type is either 1 (hash) or 2 (range)
 7 | if [ $# -ne 3 ]; then
 8 |     echo "usage: $0 input-graph machines partition-type"
 9 |     echo ""
10 |     echo "partition-type: 1 for hash partitioning"
11 |     echo "                2 for range partitioning"
12 |     exit -1
13 | fi
14 | 
15 | source ../common/get-dirs.sh
16 | source ../common/get-configs.sh
17 | 
18 | # absolute path to this script's location
19 | scriptdir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
20 | 
21 | 
22 | # place input into /user/${USER}/input/ (this is where preMizan looks)
23 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
24 | #  (or _mrange_${workers} if using range partitioning)
25 | inputgraph=$(basename $1)
26 | 
27 | # we can have multiple workers per machine
28 | machines=$2
29 | workers=$(($machines * $MIZAN_WPM))
30 | 
31 | ## log names
32 | logname=premizan_${inputgraph}_${machines}_${3}_"$(date +%Y%m%d-%H%M%S)"
33 | logfile=${logname}_time.txt
34 | 
35 | ## start logging memory + network usage
36 | ../common/bench-init.sh ${logname}
37 | 
38 | cd "$MIZAN_DIR"/preMizan/hadoopScripts/
39 | 
40 | ## start premizan conversion
41 | tstart="$(date +%s%N)"
42 | 
43 | # taken from preMizan/preMizan.sh
44 | case $3 in
45 |     [1]*) ./hadoop_run_modhash.sh $inputgraph ${workers} true 2>&1 | tee -a "$scriptdir"/logs/${logfile};;
46 |     [2]*) ./hadoop_run_range.sh $inputgraph ${workers} true 2>&1 | tee -a "$scriptdir"/logs/${logfile};;
47 |     *) echo "Error: invalid partition type!";;
48 | esac
49 | 
50 | tdone="$(date +%s%N)"
51 | 
52 | cd "$scriptdir"
53 | 
54 | echo "" | tee -a ./logs/${logfile}
55 | echo "TOTAL TIME (ns): $tdone - $tstart" | tee -a ./logs/${logfile}
56 | echo "TOTAL TIME (sec): $(perl -e "print $(($tdone - $tstart))/1000000000")" | tee -a ./logs/${logfile}
57 | 
58 | ## finish logging memory + network usage
59 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/mizan/recompile-mizan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | commondir=$(dirname "${BASH_SOURCE[0]}")/../common
 4 | source "$commondir"/get-hosts.sh
 5 | source "$commondir"/get-dirs.sh
 6 | 
 7 | # recompile Mizan
 8 | touch "$MIZAN_DIR"/src/main.cpp
 9 | cd "$MIZAN_DIR/Release"
10 | make all
11 | 
12 | for ((i = 1; i <= ${NUM_MACHINES}; i++)); do
13 |   scp ./Mizan-0.1b ${CLUSTER_NAME}${i}:"$MIZAN_DIR"/Release/ &
14 | done
15 | wait
16 | 
17 | echo "OK."


--------------------------------------------------------------------------------
/benchmark/mizan/sssp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 4 ]; then
 4 |     echo "usage: $0 input-graph machines migration-mode source-vertex"
 5 |     echo ""
 6 |     echo "migration-mode: 0 for static (no dynamic migration)"
 7 |     echo "                1 for delayed migration"
 8 |     echo "                2 for mixed migration"
 9 |     exit -1
10 | fi
11 | 
12 | source ../common/get-dirs.sh
13 | source ../common/get-configs.sh
14 | 
15 | # place input into /user/${USER}/input/ (this is where preMizan looks)
16 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
17 | #  (or _mrange_${workers} if using range partitioning)
18 | # output of algorithm is in /user/${USER}/mizan-output/
19 | inputgraph=$(basename $1)
20 | 
21 | # we can have multiple workers per machine
22 | machines=$2
23 | workers=$(($machines * $MIZAN_WPM))
24 | 
25 | mode=$3
26 | case ${mode} in
27 |     0) modeflag="1";;
28 |     1) modeflag="2";;
29 |     2) modeflag="3";;
30 |     *) echo "Invalid migration-mode"; exit -1;;
31 | esac
32 | 
33 | src=$4
34 | 
35 | ## log names
36 | logname=sssp_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
37 | logfile=${logname}_time.txt       # Mizan stats (incl. running time)
38 | 
39 | 
40 | ## start logging memory + network usage
41 | ../common/bench-init.sh ${logname}
42 | 
43 | ## start algorithm run
44 | mpirun -f slaves -np ${workers} "$MIZAN_DIR"/Release/Mizan-0.1b \
45 |     -a 5 \
46 |     --src ${src} \
47 |     -u ${USER} \
48 |     -g ${inputgraph} \
49 |     -w ${workers} \
50 |     -m ${modeflag} 2>&1 | tee -a ./logs/${logfile}
51 | 
52 | ## finish logging memory + network usage
53 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/mizan/wcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |     echo "usage: $0 input-graph machines migration-mode"
 5 |     echo ""
 6 |     echo "migration-mode: 0 for static (no dynamic migration)"
 7 |     echo "                1 for delayed migration"
 8 |     echo "                2 for mixed migration"
 9 |     exit -1
10 | fi
11 | 
12 | source ../common/get-dirs.sh
13 | source ../common/get-configs.sh
14 | 
15 | # place input into /user/${USER}/input/ (this is where preMizan looks)
16 | # output of preMizan is in /user/${USER}/m_output/mizan_${inputgraph}_mhash_${workers}/
17 | #  (or _mrange_${workers} if using range partitioning)
18 | # output of algorithm is in /user/${USER}/mizan-output/
19 | inputgraph=$(basename $1)
20 | 
21 | # we can have multiple workers per machine
22 | machines=$2
23 | workers=$(($machines * $MIZAN_WPM))
24 | 
25 | mode=$3
26 | case ${mode} in
27 |     0) modeflag="1";;
28 |     1) modeflag="2";;
29 |     2) modeflag="3";;
30 |     *) echo "Invalid migration-mode"; exit -1;;
31 | esac
32 | 
33 | ## log names
34 | logname=wcc_${inputgraph}_${machines}_${mode}_"$(date +%Y%m%d-%H%M%S)"
35 | logfile=${logname}_time.txt       # Mizan stats (incl. running time)
36 | 
37 | 
38 | ## start logging memory + network usage
39 | ../common/bench-init.sh ${logname}
40 | 
41 | ## start algorithm run
42 | mpirun -f slaves -np ${workers} "$MIZAN_DIR"/Release/Mizan-0.1b \
43 |     -a 6 \
44 |     -u ${USER} \
45 |     -g ${inputgraph} \
46 |     -w ${workers} \
47 |     -m ${modeflag} 2>&1 | tee -a ./logs/${logfile}
48 | 
49 | ## finish logging memory + network usage
50 | ../common/bench-finish.sh ${logname}


--------------------------------------------------------------------------------
/benchmark/parsers/log-checker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Checks to ensure all log files are present.
 4 | #
 5 | # A simple way to use this is "./log-checker.sh *time.txt".
 6 | #
 7 | # Note that the *_0_mem.txt matching is useful for spotting failed
 8 | # runs too, as bench-init runs before the time log is created.
 9 | 
10 | if [ $# -lt 1 ]; then
11 |     echo "usage: $0 time/mem-log [time/mem-log ...]"
12 |     echo ""
13 |     echo "time/mem-log: experiment's time log file OR master's mem file"
14 |     echo "          (e.g. pagerank_orkut-adj.txt_16_20140101-123050_time.txt)"
15 |     echo "          ( OR  pagerank_orkut-adj.txt_16_20140101-123050_0_mem.txt)"
16 |     exit -1
17 | fi
18 | 
19 | dir=$PWD
20 | 
21 | # read args into array of files
22 | read -a FILES <<< $(echo "$@")
23 | 
24 | for file in "${FILES[@]}"; do
25 |     logname=$(echo $(basename "$file") | sed -e 's/_time.txt$//g' -e 's/_0_mem.txt$//g')
26 | 
27 |     # move to where the logs are
28 |     cd "$dir/$(dirname "$file")"
29 | 
30 |     err="$logname\n"
31 |     iserr=0
32 | 
33 |     # check if all files are present
34 |     if [[ ! -f "${logname}_time.txt" ]]; then
35 |         err="$err  ERROR: ${logname}_time.txt missing!\n"
36 |         iserr=1
37 |     fi
38 | 
39 |     machines=$(echo "$logname" | sed 's/_/ /g' | awk '{print $3}')
40 | 
41 |     for (( i = 0; i <= ${machines}; i++ )); do
42 |         if [[ ! -f "${logname}_${i}_mem.txt" ]]; then
43 |             err="$err  ERROR: ${logname}_${i}_mem.txt missing!\n"
44 |             iserr=1
45 |         elif [[ ! -f "${logname}_${i}_nbt.txt" ]]; then
46 |             err="$err  ERROR: ${logname}_${i}_nbt.txt missing!\n"
47 |             iserr=1
48 |         elif [[ ! -f "${logname}_${i}_cpu.txt" ]]; then
49 |             err="$err  WARNING: ${logname}_${i}_cpu.txt missing!\n"
50 |             iserr=1
51 |         elif [[ ! -f "${logname}_${i}_net.txt" ]]; then
52 |             err="$err  WARNING: ${logname}_${i}_net.txt missing!\n"
53 |             iserr=1
54 |         fi
55 |     done
56 | 
57 |     # only print something when there's an error
58 |     if [[ $iserr -eq 1 ]]; then
59 |         echo -e "$err"
60 |     fi
61 | done


--------------------------------------------------------------------------------
/benchmark/readme.txt:
--------------------------------------------------------------------------------
 1 | =====================================================================
 2 | Please see the wiki at http://github.com/xvz/graph-processing/wiki/
 3 | =====================================================================
 4 | 
 5 | Scripts specific to each system and/or Hadoop are located in their respective folders. Scripts common across multiple systems (e.g., pre- and post-benchmarking setup/cleanup scripts) are in "common".
 6 | 
 7 | All results are stored in ./<system>/logs/, where system is giraph, gps, graphlab, or mizan.
 8 | 
 9 | WARNING: Everything has only been tested in bash! Things may or may not break if you use a different shell.
10 | 
11 | NOTE: Benching scripts MUST be run from their folders (i.e., $PWD = location of script)---otherwise they won't work. Other scripts can be ran from anywhere.
12 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/findbugs-exclude.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |   contributor license agreements.  See the NOTICE file distributed with
 6 |   this work for additional information regarding copyright ownership.
 7 |   The ASF licenses this file to You under the Apache License, Version 2.0
 8 |   (the "License"); you may not use this file except in compliance with
 9 |   the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <FindBugsFilter>
21 |   <Match>
22 |     <Bug pattern="DM_DEFAULT_ENCODING"/>
23 |   </Match>
24 |   <Match>
25 |     <Bug pattern="DM_EXIT"/>
26 |   </Match>
27 |   <Match>
28 |     <Bug pattern="EI_EXPOSE_REP"/>
29 |   </Match>
30 |   <Match>
31 |     <Bug pattern="EI_EXPOSE_REP2"/>
32 |   </Match>
33 |   <Match>
34 |     <Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS"/>
35 |   </Match>
36 |   <Match>
37 |     <Bug pattern="JLM_JSR166_UTILCONCURRENT_MONITORENTER"/>
38 |   </Match>
39 |   <Match>
40 |     <Bug pattern="NS_DANGEROUS_NON_SHORT_CIRCUIT"/>
41 |   </Match>
42 |   <Match>
43 |     <Bug pattern="RV_ABSOLUTE_VALUE_OF_HASHCODE"/>
44 |   </Match>
45 |   <Match>
46 |     <Bug pattern="RV_ABSOLUTE_VALUE_OF_RANDOM_INT"/>
47 |   </Match>
48 |   <Match>
49 |     <Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
50 |   </Match>
51 |   <Match>
52 |     <Bug pattern="SE_COMPARATOR_SHOULD_BE_SERIALIZABLE"/>
53 |   </Match>
54 |   <Match>
55 |     <Bug pattern="ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD"/>
56 |   </Match>
57 |   <Match>
58 |     <Bug pattern="UUF_UNUSED_FIELD"/>
59 |   </Match>
60 |   <Match>
61 |     <Bug pattern="UWF_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD"/>
62 |   </Match>
63 |   <Match>
64 |     <Class name="org.apache.giraph.examples.MinimumSpanningTreeVertex"/>
65 |     <Method name="compute"/>
66 |     <Bug pattern="SF_SWITCH_FALLTHROUGH"/>
67 |   </Match>
68 | </FindBugsFilter>
69 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-core/src/main/java/org/apache/giraph/io/formats/JsonLongLongLongLongVertexInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.giraph.io.formats;
 19 | 
 20 | import com.google.common.collect.Lists;
 21 | import org.apache.giraph.edge.Edge;
 22 | import org.apache.giraph.edge.EdgeFactory;
 23 | import org.apache.giraph.graph.Vertex;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.hadoop.io.Text;
 26 | import org.apache.hadoop.mapreduce.InputSplit;
 27 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 28 | import org.json.JSONArray;
 29 | import org.json.JSONException;
 30 | 
 31 | import java.io.IOException;
 32 | import java.util.List;
 33 | 
 34 | /**
 35 |   * VertexInputFormat that features <code>long</code> vertex ID's,
 36 |   * <code>long</code> vertex values and <code>long</code>
 37 |   * out-edge weights, and <code>long</code> message types,
 38 |   *  specified in JSON format.
 39 |   */
 40 | public class JsonLongLongLongLongVertexInputFormat extends
 41 |   TextVertexInputFormat<LongWritable, LongWritable, LongWritable> {
 42 | 
 43 |   @Override
 44 |   public TextVertexReader createVertexReader(InputSplit split,
 45 |       TaskAttemptContext context) {
 46 |     return new JsonLongLongLongLongVertexReader();
 47 |   }
 48 | 
 49 |  /**
 50 |   * VertexReader that features <code>long</code> vertex
 51 |   * values and <code>long</code> out-edge weights. The
 52 |   * files should be in the following JSON format:
 53 |   * JSONArray(<vertex id>, <vertex value>,
 54 |   *   JSONArray(JSONArray(<dest vertex id>, <edge value>), ...))
 55 |   * Here is an example with vertex id 1, vertex value 4, and two edges.
 56 |   * First edge has a destination vertex 2, edge value 2.
 57 |   * Second edge has a destination vertex 3, edge value 1.
 58 |   * [1,4,[[2,2],[3,1]]]
 59 |   */
 60 |   class JsonLongLongLongLongVertexReader extends
 61 |     TextVertexReaderFromEachLineProcessedHandlingExceptions<JSONArray,
 62 |     JSONException> {
 63 | 
 64 |     @Override
 65 |     protected JSONArray preprocessLine(Text line) throws JSONException {
 66 |       return new JSONArray(line.toString());
 67 |     }
 68 | 
 69 |     @Override
 70 |     protected LongWritable getId(JSONArray jsonVertex) throws JSONException,
 71 |               IOException {
 72 |       return new LongWritable(jsonVertex.getLong(0));
 73 |     }
 74 | 
 75 |     @Override
 76 |     protected LongWritable getValue(JSONArray jsonVertex) throws
 77 |       JSONException, IOException {
 78 |       return new LongWritable(jsonVertex.getLong(1));
 79 |     }
 80 | 
 81 |     @Override
 82 |     protected Iterable<Edge<LongWritable, LongWritable>> getEdges(
 83 |         JSONArray jsonVertex) throws JSONException, IOException {
 84 |       JSONArray jsonEdgeArray = jsonVertex.getJSONArray(2);
 85 |       List<Edge<LongWritable, LongWritable>> edges =
 86 |           Lists.newArrayListWithCapacity(jsonEdgeArray.length());
 87 |       for (int i = 0; i < jsonEdgeArray.length(); ++i) {
 88 |         JSONArray jsonEdge = jsonEdgeArray.getJSONArray(i);
 89 |         edges.add(EdgeFactory.create(new LongWritable(jsonEdge.getLong(0)),
 90 |             new LongWritable(jsonEdge.getLong(1))));
 91 |       }
 92 |       return edges;
 93 |     }
 94 | 
 95 |     @Override
 96 |     protected Vertex<LongWritable, LongWritable, LongWritable,
 97 |               LongWritable> handleException(Text line, JSONArray jsonVertex,
 98 |                   JSONException e) {
 99 |       throw new IllegalArgumentException(
100 |           "Couldn't get vertex from line " + line, e);
101 |     }
102 | 
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-core/src/main/java/org/apache/giraph/io/formats/JsonLongLongNullLongVertexInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.giraph.io.formats;
 19 | 
 20 | import com.google.common.collect.Lists;
 21 | import org.apache.giraph.edge.Edge;
 22 | import org.apache.giraph.edge.EdgeFactory;
 23 | import org.apache.giraph.graph.Vertex;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.hadoop.io.NullWritable;
 26 | import org.apache.hadoop.io.Text;
 27 | import org.apache.hadoop.mapreduce.InputSplit;
 28 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 29 | import org.json.JSONArray;
 30 | import org.json.JSONException;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.List;
 34 | 
 35 | /**
 36 |   * VertexInputFormat that features <code>long</code> vertex ID's,
 37 |   * <code>long</code> vertex values and <code>null</code>
 38 |   * out-edge weights, and <code>long</code> message types,
 39 |   *  specified in JSON format.
 40 |   */
 41 | public class JsonLongLongNullLongVertexInputFormat extends
 42 |   TextVertexInputFormat<LongWritable, LongWritable, NullWritable> {
 43 | 
 44 |   @Override
 45 |   public TextVertexReader createVertexReader(InputSplit split,
 46 |       TaskAttemptContext context) {
 47 |     return new JsonLongLongNullLongVertexReader();
 48 |   }
 49 | 
 50 |  /**
 51 |   * VertexReader that features <code>long</code> vertex
 52 |   * values and <code>null</code> out-edge weights. The
 53 |   * files should be in the following JSON format:
 54 |   * JSONArray(<vertex id>, <vertex value>,
 55 |   *   JSONArray(JSONArray(<dest vertex id>, <edge value>), ...))
 56 |   * Here is an example with vertex id 1, vertex value 4, and two edges.
 57 |   * First edge has a destination vertex 2, edge value 2.
 58 |   * Second edge has a destination vertex 3, edge value 1.
 59 |   * [1,4,[[2,2],[3,1]]]
 60 |   */
 61 |   class JsonLongLongNullLongVertexReader extends
 62 |     TextVertexReaderFromEachLineProcessedHandlingExceptions<JSONArray,
 63 |     JSONException> {
 64 | 
 65 |     @Override
 66 |     protected JSONArray preprocessLine(Text line) throws JSONException {
 67 |       return new JSONArray(line.toString());
 68 |     }
 69 | 
 70 |     @Override
 71 |     protected LongWritable getId(JSONArray jsonVertex) throws JSONException,
 72 |               IOException {
 73 |       return new LongWritable(jsonVertex.getLong(0));
 74 |     }
 75 | 
 76 |     @Override
 77 |     protected LongWritable getValue(JSONArray jsonVertex) throws
 78 |       JSONException, IOException {
 79 |       return new LongWritable(jsonVertex.getLong(1));
 80 |     }
 81 | 
 82 |     @Override
 83 |     protected Iterable<Edge<LongWritable, NullWritable>> getEdges(
 84 |         JSONArray jsonVertex) throws JSONException, IOException {
 85 |       JSONArray jsonEdgeArray = jsonVertex.getJSONArray(2);
 86 |       List<Edge<LongWritable, NullWritable>> edges =
 87 |           Lists.newArrayListWithCapacity(jsonEdgeArray.length());
 88 |       for (int i = 0; i < jsonEdgeArray.length(); ++i) {
 89 |         JSONArray jsonEdge = jsonEdgeArray.getJSONArray(i);
 90 |         edges.add(EdgeFactory.create(new LongWritable(jsonEdge.getLong(0)),
 91 |                                      NullWritable.get()));
 92 |       }
 93 |       return edges;
 94 |     }
 95 | 
 96 |     @Override
 97 |     protected Vertex<LongWritable, LongWritable, NullWritable,
 98 |               LongWritable> handleException(Text line, JSONArray jsonVertex,
 99 |                   JSONException e) {
100 |       throw new IllegalArgumentException(
101 |           "Couldn't get vertex from line " + line, e);
102 |     }
103 | 
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/ConnectedComponentsInputFormat.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package org.apache.giraph.examples;
20 | 
21 | import com.google.common.collect.Lists;
22 | import org.apache.giraph.edge.Edge;
23 | import org.apache.giraph.edge.EdgeFactory;
24 | import org.apache.hadoop.io.LongWritable;
25 | import org.apache.hadoop.io.NullWritable;
26 | import org.apache.hadoop.io.Text;
27 | import org.apache.giraph.io.formats.TextVertexInputFormat;
28 | import org.apache.hadoop.mapreduce.InputSplit;
29 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
30 | 
31 | import java.io.IOException;
32 | import java.util.List;
33 | import java.util.regex.Pattern;
34 | 
35 | /**
36 |  * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
37 |  * {@link org.apache.giraph.examples.ConnectedComponentsVertex}.
38 |  *
39 |  * Inputs have long ids, no edge weights, and no vertex values.
40 |  * (Vertex values are set to a long of 0.)
41 |  *
42 |  * Each line consists of:
43 |  * vertex neighbor1 neighbor2 ...
44 |  *
45 |  * Values can be separated by spaces or tabs.
46 |  */
47 | public class ConnectedComponentsInputFormat extends
48 |     TextVertexInputFormat<LongWritable, LongWritable, NullWritable> {
49 |   /** Separator of the vertex and neighbors */
50 |   private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
51 | 
52 |   @Override
53 |   public TextVertexReader createVertexReader(InputSplit split,
54 |       TaskAttemptContext context)
55 |     throws IOException {
56 |     return new ConnectedComponentsVertexReader();
57 |   }
58 | 
59 |   /**
60 |    * Vertex reader associated with {@link ConnectedComponentsInputFormat}.
61 |    */
62 |   public class ConnectedComponentsVertexReader extends
63 |     TextVertexReaderFromEachLineProcessed<String[]> {
64 |     /**
65 |      * Cached vertex id for the current line
66 |      */
67 |     private LongWritable id;
68 | 
69 |     @Override
70 |     protected String[] preprocessLine(Text line) throws IOException {
71 |       String[] tokens = SEPARATOR.split(line.toString());
72 |       id = new LongWritable(Long.parseLong(tokens[0]));
73 |       return tokens;
74 |     }
75 | 
76 |     @Override
77 |     protected LongWritable getId(String[] tokens) throws IOException {
78 |       return id;
79 |     }
80 | 
81 |     @Override
82 |     protected LongWritable getValue(String[] tokens) throws IOException {
83 |       return new LongWritable(0);
84 |     }
85 | 
86 |     @Override
87 |     protected Iterable<Edge<LongWritable, NullWritable>> getEdges(
88 |         String[] tokens) throws IOException {
89 |       List<Edge<LongWritable, NullWritable>> edges =
90 |           Lists.newArrayListWithCapacity(tokens.length - 1);
91 |       for (int i = 1; i < tokens.length; i++) {
92 |         edges.add(EdgeFactory.create(
93 |             new LongWritable(Long.parseLong(tokens[i]))));
94 |       }
95 |       return edges;
96 |     }
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/ConnectedComponentsVertex.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package org.apache.giraph.examples;
20 | 
21 | // NOTE: original code used this.. have to comment it out
22 | // b/c of checkstyle errors
23 | //import org.apache.giraph.edge.Edge;
24 | import org.apache.giraph.graph.Vertex;
25 | import org.apache.hadoop.io.LongWritable;
26 | import org.apache.hadoop.io.NullWritable;
27 | 
28 | import java.io.IOException;
29 | 
30 | /**
31 |  * Implementation of the HCC algorithm that identifies connected components and
32 |  * assigns each vertex its "component identifier" (the smallest vertex id
33 |  * in the component)
34 |  *
35 |  * The idea behind the algorithm is very simple: propagate the smallest
36 |  * vertex id along the edges to all vertices of a connected component. The
37 |  * number of supersteps necessary is equal to the length of the maximum
38 |  * diameter of all components + 1
39 |  *
40 |  * The original Hadoop-based variant of this algorithm was proposed by Kang,
41 |  * Charalampos, Tsourakakis and Faloutsos in
42 |  * "PEGASUS: Mining Peta-Scale Graphs", 2010
43 |  *
44 |  * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf
45 |  */
46 | @Algorithm(
47 |     name = "Connected components",
48 |     description = "Finds connected components of the graph"
49 | )
50 | public class ConnectedComponentsVertex extends Vertex<LongWritable,
51 |     LongWritable, NullWritable, LongWritable> {
52 |   /**
53 |    * Propagates the smallest vertex id to all neighbors. Will always choose to
54 |    * halt and only reactivate if a smaller id has been sent to it.
55 |    *
56 |    * @param messages Iterator of messages from the previous superstep.
57 |    * @throws IOException
58 |    */
59 |   @Override
60 |   public void compute(Iterable<LongWritable> messages) throws IOException {
61 |     long currentComponent = getValue().get();
62 | 
63 |     // in first superstep, load proper vertex values and then broadcast
64 |     if (getSuperstep() == 0) {
65 |       currentComponent = getId().get();
66 |       setValue(new LongWritable(currentComponent));
67 | 
68 |       // indiscriminately send messages to all neighbours,
69 |       // as this mirrors GPS and Mizan implementations
70 |       sendMessageToAllEdges(getValue());
71 | 
72 |       voteToHalt();
73 |       return;
74 |     }
75 | 
76 |     boolean changed = false;
77 |     // did we get a smaller id ?
78 |     for (LongWritable message : messages) {
79 |       long candidateComponent = message.get();
80 |       if (candidateComponent < currentComponent) {
81 |         currentComponent = candidateComponent;
82 |         changed = true;
83 |       }
84 |     }
85 | 
86 |     // propagate new component id to the neighbors
87 |     if (changed) {
88 |       setValue(new LongWritable(currentComponent));
89 |       sendMessageToAllEdges(getValue());
90 |     }
91 | 
92 |     voteToHalt();
93 |   }
94 | }
95 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/DiameterEstimationInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.giraph.examples;
 20 | 
 21 | import com.google.common.collect.Lists;
 22 | import org.apache.giraph.edge.Edge;
 23 | import org.apache.giraph.edge.EdgeFactory;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.giraph.examples.DiameterEstimationVertex.LongArrayWritable;
 26 | import org.apache.hadoop.io.NullWritable;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 29 | import org.apache.hadoop.mapreduce.InputSplit;
 30 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.List;
 34 | import java.util.regex.Pattern;
 35 | 
 36 | /**
 37 |  * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
 38 |  * {@link org.apache.giraph.examples.DiameterEstimationVertex}.
 39 |  *
 40 |  * Inputs have long ids, no edge weights, and no vertex values.
 41 |  *
 42 |  * Each line consists of:
 43 |  * vertex neighbor1 neighbor2 ...
 44 |  *
 45 |  * Values can be separated by spaces or tabs.
 46 |  */
 47 | public class DiameterEstimationInputFormat extends
 48 |     TextVertexInputFormat<LongWritable, LongArrayWritable, NullWritable> {
 49 |   /** Separator of the vertex and neighbors */
 50 |   private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
 51 | 
 52 |   @Override
 53 |   public TextVertexReader createVertexReader(InputSplit split,
 54 |       TaskAttemptContext context)
 55 |     throws IOException {
 56 |     return new DiameterEstimationVertexReader();
 57 |   }
 58 | 
 59 |   /**
 60 |    * Vertex reader associated with {@link DiameterEstimationInputFormat}.
 61 |    */
 62 |   public class DiameterEstimationVertexReader extends
 63 |     TextVertexReaderFromEachLineProcessed<String[]> {
 64 |     /**
 65 |      * Cached vertex id for the current line
 66 |      */
 67 |     private LongWritable id;
 68 | 
 69 |     @Override
 70 |     protected String[] preprocessLine(Text line) throws IOException {
 71 |       String[] tokens = SEPARATOR.split(line.toString());
 72 |       id = new LongWritable(Long.parseLong(tokens[0]));
 73 |       return tokens;
 74 |     }
 75 | 
 76 |     @Override
 77 |     protected LongWritable getId(String[] tokens) throws IOException {
 78 |       return id;
 79 |     }
 80 | 
 81 |     @Override
 82 |     protected LongArrayWritable getValue(String[] tokens) throws IOException {
 83 |       // ignore tokens and return dummy LongArrayWritable
 84 |       // (this will be replaced during computation)
 85 |       return new LongArrayWritable();
 86 |     }
 87 | 
 88 |     @Override
 89 |     protected Iterable<Edge<LongWritable, NullWritable>> getEdges(
 90 |         String[] tokens) throws IOException {
 91 |       List<Edge<LongWritable, NullWritable>> edges =
 92 |           Lists.newArrayListWithCapacity(tokens.length - 1);
 93 |       for (int i = 1; i < tokens.length; i++) {
 94 |         edges.add(EdgeFactory.create(
 95 |               new LongWritable(Long.parseLong(tokens[i]))));
 96 |       }
 97 |       return edges;
 98 |     }
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/JsonLongLongArrayInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.giraph.examples;
 19 | 
 20 | import com.google.common.collect.Lists;
 21 | import org.apache.giraph.edge.Edge;
 22 | import org.apache.giraph.edge.EdgeFactory;
 23 | import org.apache.giraph.graph.Vertex;
 24 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 25 | import org.apache.hadoop.io.LongWritable;
 26 | import org.apache.hadoop.io.NullWritable;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.giraph.examples.DiameterEstimationVertex.LongArrayWritable;
 29 | import org.apache.hadoop.mapreduce.InputSplit;
 30 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 31 | import org.json.JSONArray;
 32 | import org.json.JSONException;
 33 | 
 34 | import java.io.IOException;
 35 | import java.util.List;
 36 | 
 37 | /**
 38 |  * ***DEPRECATED***
 39 |  * We no longer use Json format for input. Instead, we use simple
 40 |  * text input format. See the new DiameterEstimationInputFormat.
 41 |  * ***DEPRECATED***
 42 |  *
 43 |  * VertexInputFormat that reads in <code>long</code> vertex IDs,
 44 |  * <code>double</code> vertex values and <code>float</code>
 45 |  * out-edge weights, and <code>double</code> message types,
 46 |  * specified in JSON format. Output graph has <code>long</code>
 47 |  * vertex IDs, but dimest-specific vertex value, out-edge weight
 48 |  * and message types.
 49 |  */
 50 | public class JsonLongLongArrayInputFormat extends
 51 |   TextVertexInputFormat<LongWritable, LongArrayWritable, NullWritable> {
 52 | 
 53 |   @Override
 54 |   public TextVertexReader createVertexReader(InputSplit split,
 55 |       TaskAttemptContext context) {
 56 |     return new JsonLongLongArrayReader();
 57 |   }
 58 | 
 59 |  /**
 60 |   * VertexReader that features <code>LongArrayWritable</code> vertex
 61 |   * values and <code>NullWritable</code> out-edge weights. The
 62 |   * files should be in the following JSON format:
 63 |   * JSONArray(<vertex id>, <vertex value>,
 64 |   *   JSONArray(JSONArray(<dest vertex id>, <edge value>), ...))
 65 |   * Here is an example with vertex id 1, vertex value 4.3, and two edges.
 66 |   * First edge has a destination vertex 2, edge value 2.1.
 67 |   * Second edge has a destination vertex 3, edge value 0.7.
 68 |   * [1,4.3,[[2,2.1],[3,0.7]]]
 69 |   *
 70 |   * Vertex value and edge weights must be present but are ignored.
 71 |   */
 72 |   class JsonLongLongArrayReader extends
 73 |     TextVertexReaderFromEachLineProcessedHandlingExceptions<JSONArray,
 74 |     JSONException> {
 75 | 
 76 |     @Override
 77 |     protected JSONArray preprocessLine(Text line) throws JSONException {
 78 |       return new JSONArray(line.toString());
 79 |     }
 80 | 
 81 |     @Override
 82 |     protected LongWritable getId(JSONArray jsonVertex) throws JSONException,
 83 |               IOException {
 84 |       return new LongWritable(jsonVertex.getLong(0));
 85 |     }
 86 | 
 87 |     @Override
 88 |     protected LongArrayWritable getValue(JSONArray jsonVertex) throws
 89 |       JSONException, IOException {
 90 |       // ignore whatever is in jsonVertex, and return dummy LongArrayWritable
 91 |       // instead (this will be replaced during computation)
 92 |       return new LongArrayWritable();
 93 |     }
 94 | 
 95 |     @Override
 96 |     protected Iterable<Edge<LongWritable, NullWritable>> getEdges(
 97 |         JSONArray jsonVertex) throws JSONException, IOException {
 98 | 
 99 |       JSONArray jsonEdgeArray = jsonVertex.getJSONArray(2);
100 |       List<Edge<LongWritable, NullWritable>> edges =
101 |           Lists.newArrayListWithCapacity(jsonEdgeArray.length());
102 | 
103 |       long dst;
104 | 
105 |       for (int i = 0; i < jsonEdgeArray.length(); ++i) {
106 |         JSONArray jsonEdge = jsonEdgeArray.getJSONArray(i);
107 |         dst = jsonEdge.getLong(0);
108 |         edges.add(EdgeFactory.create(new LongWritable(dst),
109 |                                      NullWritable.get()));
110 |       }
111 |       return edges;
112 |     }
113 | 
114 |     @Override
115 |     protected Vertex<LongWritable, LongArrayWritable,
116 |                      NullWritable, LongArrayWritable>
117 |     handleException(Text line, JSONArray jsonVertex, JSONException e) {
118 |       throw new IllegalArgumentException(
119 |           "Couldn't get vertex from line " + line, e);
120 |     }
121 | 
122 |   }
123 | }
124 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/JsonLongMSTVertexInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package org.apache.giraph.examples;
 19 | 
 20 | import com.google.common.collect.Lists;
 21 | import org.apache.giraph.edge.Edge;
 22 | import org.apache.giraph.edge.EdgeFactory;
 23 | import org.apache.giraph.graph.Vertex;
 24 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 25 | import org.apache.hadoop.io.LongWritable;
 26 | import org.apache.hadoop.io.Text;
 27 | import org.apache.giraph.examples.MinimumSpanningTreeVertex.MSTVertexValue;
 28 | import org.apache.giraph.examples.MinimumSpanningTreeVertex.MSTEdgeValue;
 29 | import org.apache.giraph.examples.MinimumSpanningTreeVertex.MSTMessage;
 30 | import org.apache.hadoop.mapreduce.InputSplit;
 31 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 32 | import org.json.JSONArray;
 33 | import org.json.JSONException;
 34 | 
 35 | import java.io.IOException;
 36 | import java.util.List;
 37 | 
 38 | /**
 39 |  * ***DEPRECATED***
 40 |  * We no longer use Json format for input. Instead, we use simple
 41 |  * text input format. See the new MinimumSpanningTreeInputFormat.
 42 |  * ***DEPRECATED***
 43 |  *
 44 |  * VertexInputFormat that reads in <code>long</code> vertex IDs,
 45 |  * <code>double</code> vertex values and <code>float</code>
 46 |  * out-edge weights, and <code>double</code> message types,
 47 |  * specified in JSON format. Output graph has <code>long</code>
 48 |  * vertex IDs, but MST-specific vertex value, out-edge weight
 49 |  * and message types.
 50 |  */
 51 | public class JsonLongMSTVertexInputFormat extends
 52 |   TextVertexInputFormat<LongWritable, MSTVertexValue, MSTEdgeValue> {
 53 | 
 54 |   @Override
 55 |   public TextVertexReader createVertexReader(InputSplit split,
 56 |       TaskAttemptContext context) {
 57 |     return new JsonLongMSTVertexReader();
 58 |   }
 59 | 
 60 |  /**
 61 |   * VertexReader that features <code>MSTVertexValue</code> vertex
 62 |   * values and <code>MSTEdgeValue</code> out-edge weights. The
 63 |   * files should be in the following JSON format:
 64 |   * JSONArray(<vertex id>, <vertex value>,
 65 |   *   JSONArray(JSONArray(<dest vertex id>, <edge value>), ...))
 66 |   * Here is an example with vertex id 1, vertex value 4.3, and two edges.
 67 |   * First edge has a destination vertex 2, edge value 2.1.
 68 |   * Second edge has a destination vertex 3, edge value 0.7.
 69 |   * [1,4.3,[[2,2.1],[3,0.7]]]
 70 |   */
 71 |   class JsonLongMSTVertexReader extends
 72 |     TextVertexReaderFromEachLineProcessedHandlingExceptions<JSONArray,
 73 |     JSONException> {
 74 | 
 75 |     @Override
 76 |     protected JSONArray preprocessLine(Text line) throws JSONException {
 77 |       return new JSONArray(line.toString());
 78 |     }
 79 | 
 80 |     @Override
 81 |     protected LongWritable getId(JSONArray jsonVertex) throws JSONException,
 82 |               IOException {
 83 |       return new LongWritable(jsonVertex.getLong(0));
 84 |     }
 85 | 
 86 |     @Override
 87 |     protected MSTVertexValue getValue(JSONArray jsonVertex) throws
 88 |       JSONException, IOException {
 89 |       // ignore whatever is in jsonVertex, and return dummy MSTVertexValue
 90 |       // instead (this will be replaced during computation)
 91 |       return new MSTVertexValue();
 92 |     }
 93 | 
 94 |     @Override
 95 |     protected Iterable<Edge<LongWritable, MSTEdgeValue>> getEdges(
 96 |         JSONArray jsonVertex) throws JSONException, IOException {
 97 | 
 98 |       long src = jsonVertex.getLong(0);
 99 | 
100 |       JSONArray jsonEdgeArray = jsonVertex.getJSONArray(2);
101 |       List<Edge<LongWritable, MSTEdgeValue>> edges =
102 |           Lists.newArrayListWithCapacity(jsonEdgeArray.length());
103 | 
104 |       long dst;
105 |       double weight;
106 | 
107 |       for (int i = 0; i < jsonEdgeArray.length(); ++i) {
108 |         JSONArray jsonEdge = jsonEdgeArray.getJSONArray(i);
109 |         dst = jsonEdge.getLong(0);
110 |         weight = jsonEdge.getDouble(1);
111 | 
112 |         edges.add(EdgeFactory.create(new LongWritable(dst),
113 |                                      new MSTEdgeValue(weight, src, dst)));
114 |       }
115 |       return edges;
116 |     }
117 | 
118 |     @Override
119 |     protected Vertex<LongWritable, MSTVertexValue, MSTEdgeValue, MSTMessage>
120 |     handleException(Text line, JSONArray jsonVertex, JSONException e) {
121 |       throw new IllegalArgumentException(
122 |           "Couldn't get vertex from line " + line, e);
123 |     }
124 | 
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/MinimumSpanningTreeInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.giraph.examples;
 20 | 
 21 | import com.google.common.collect.Lists;
 22 | import org.apache.giraph.edge.Edge;
 23 | import org.apache.giraph.edge.EdgeFactory;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.giraph.examples.MinimumSpanningTreeVertex.MSTVertexValue;
 26 | import org.apache.giraph.examples.MinimumSpanningTreeVertex.MSTEdgeValue;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 29 | import org.apache.hadoop.mapreduce.InputSplit;
 30 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.List;
 34 | import java.util.regex.Pattern;
 35 | 
 36 | /**
 37 |  * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
 38 |  * {@link org.apache.giraph.examples.MinimumSpanningTreeVertex}.
 39 |  *
 40 |  * Inputs have long ids, double edge weights, and no vertex values.
 41 |  *
 42 |  * Each line consists of:
 43 |  * vertex neighbor1 neighbor1-weight neighbor2 neighbor2-weight ...
 44 |  *
 45 |  * Values can be separated by spaces or tabs.
 46 |  */
 47 | public class MinimumSpanningTreeInputFormat extends
 48 |     TextVertexInputFormat<LongWritable, MSTVertexValue, MSTEdgeValue> {
 49 |   /** Separator of the vertex and neighbors */
 50 |   private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
 51 | 
 52 |   @Override
 53 |   public TextVertexReader createVertexReader(InputSplit split,
 54 |       TaskAttemptContext context)
 55 |     throws IOException {
 56 |     return new MinimumSpanningTreeVertexReader();
 57 |   }
 58 | 
 59 |   /**
 60 |    * Vertex reader associated with {@link MinimumSpanningTreeInputFormat}.
 61 |    */
 62 |   public class MinimumSpanningTreeVertexReader extends
 63 |     TextVertexReaderFromEachLineProcessed<String[]> {
 64 |     /**
 65 |      * Cached vertex id for the current line
 66 |      */
 67 |     private LongWritable id;
 68 | 
 69 |     @Override
 70 |     protected String[] preprocessLine(Text line) throws IOException {
 71 |       String[] tokens = SEPARATOR.split(line.toString());
 72 |       id = new LongWritable(Long.parseLong(tokens[0]));
 73 |       return tokens;
 74 |     }
 75 | 
 76 |     @Override
 77 |     protected LongWritable getId(String[] tokens) throws IOException {
 78 |       return id;
 79 |     }
 80 | 
 81 |     @Override
 82 |     protected MSTVertexValue getValue(String[] tokens) throws IOException {
 83 |       // ignore tokens and return dummy MSTVertexValue
 84 |       // (this will be replaced during computation)
 85 |       return new MSTVertexValue();
 86 |     }
 87 | 
 88 |     @Override
 89 |     protected Iterable<Edge<LongWritable, MSTEdgeValue>> getEdges(
 90 |         String[] tokens) throws IOException {
 91 | 
 92 |       // divide by 2, to account for edge weights
 93 |       List<Edge<LongWritable, MSTEdgeValue>> edges =
 94 |           Lists.newArrayListWithCapacity((tokens.length - 1) / 2);
 95 | 
 96 |       long src = id.get();
 97 |       long dst;
 98 |       double weight;
 99 | 
100 |       for (int i = 1; i < tokens.length - 1; i += 2) {
101 |         dst = Long.parseLong(tokens[i]);
102 |         weight = Double.parseDouble(tokens[i + 1]);
103 | 
104 |         edges.add(EdgeFactory.create(new LongWritable(dst),
105 |                                      new MSTEdgeValue(weight, src, dst)));
106 |       }
107 | 
108 |       return edges;
109 |     }
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/PageRankTolFinderVertex.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.giraph.examples;
 20 | 
 21 | import java.io.IOException;
 22 | import org.apache.giraph.conf.IntConfOption;
 23 | import org.apache.giraph.aggregators.DoubleMaxAggregator;
 24 | import org.apache.giraph.graph.Vertex;
 25 | import org.apache.giraph.io.formats.TextVertexOutputFormat;
 26 | import org.apache.giraph.master.DefaultMasterCompute;
 27 | import org.apache.hadoop.io.DoubleWritable;
 28 | import org.apache.hadoop.io.NullWritable;
 29 | import org.apache.hadoop.io.LongWritable;
 30 | //import org.apache.hadoop.io.Text;
 31 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 32 | import org.apache.log4j.Logger;
 33 | 
 34 | /**
 35 |  * PageRank implementation that finds when the maximum error deltas
 36 |  * (between two supersteps) "plateaus".
 37 |  *
 38 |  * In other words, think of a plot of error-delta vs. superstep-number.
 39 |  * The goal is to determine when the function flattens out---this is
 40 |  * roughly where we should stop, as additional supersteps won't get
 41 |  * us any better of a convergence.
 42 |  *
 43 |  * As this "break even" point is different for different graphs, this
 44 |  * function helps determine what tolerance value should be used.
 45 |  */
 46 | @Algorithm(
 47 |     name = "PageRank Tolerance Finder"
 48 | )
 49 | public class PageRankTolFinderVertex extends Vertex<LongWritable,
 50 |     DoubleWritable, NullWritable, DoubleWritable> {
 51 |   /** Max number of supersteps */
 52 |   public static final IntConfOption MAX_SUPERSTEPS =
 53 |     new IntConfOption("PageRankTolFinderVertex.maxSS", 100);
 54 | 
 55 |   /** Logger */
 56 |   private static final Logger LOG =
 57 |       Logger.getLogger(PageRankTolFinderVertex.class);
 58 | 
 59 |   /** Max aggregator name */
 60 |   private static String MAX_AGG = "max";
 61 | 
 62 |   @Override
 63 |   public void compute(Iterable<DoubleWritable> messages) {
 64 |     // NOTE: We follow GraphLab's alternative way of computing PageRank,
 65 |     // which is to not divide by |V|. To get the probability value at
 66 |     // each vertex, take its PageRank value and divide by |V|.
 67 | 
 68 |     double oldVal = getValue().get();
 69 | 
 70 |     if (getSuperstep() == 0) {
 71 |       // FIX: initial value is 1/|V| (or 1), not 0.15/|V| (or 0.15)
 72 |       DoubleWritable vertexValue = new DoubleWritable(1.0);
 73 |       //new DoubleWritable(0.15f / getTotalNumVertices());
 74 |       setValue(vertexValue);
 75 | 
 76 |     } else {
 77 |       double sum = 0;
 78 |       for (DoubleWritable message : messages) {
 79 |         sum += message.get();
 80 |       }
 81 |       DoubleWritable vertexValue = new DoubleWritable(0.15f + 0.85f * sum);
 82 |       //new DoubleWritable((0.15f / getTotalNumVertices()) + 0.85f * sum);
 83 |       setValue(vertexValue);
 84 |     }
 85 | 
 86 |     aggregate(MAX_AGG,
 87 |               new DoubleWritable(Math.abs(oldVal - getValue().get())));
 88 | 
 89 |     // Termination condition based on max supersteps
 90 |     if (getSuperstep() < MAX_SUPERSTEPS.get(getConf())) {
 91 |       long edges = getNumEdges();
 92 |       sendMessageToAllEdges(new DoubleWritable(getValue().get() / edges));
 93 |     } else {
 94 |       voteToHalt();
 95 |     }
 96 |   }
 97 | 
 98 |   /**
 99 |    * Master compute associated with {@link PageRankTolFinderVertex}.
100 |    * It registers required aggregators.
101 |    */
102 |   public static class PageRankTolFinderVertexMasterCompute extends
103 |       DefaultMasterCompute {
104 |     @Override
105 |     public void initialize() throws InstantiationException,
106 |         IllegalAccessException {
107 |       registerAggregator(MAX_AGG, DoubleMaxAggregator.class);
108 |     }
109 | 
110 |     @Override
111 |     public void compute() {
112 |       // this is result of aggregators from the *previous* superstep
113 |       if (getSuperstep() > 0) {
114 |         LOG.info("SS " + (getSuperstep() - 1) + " max change: " +
115 |                  ((DoubleWritable) getAggregatedValue(MAX_AGG)).get());
116 |       }
117 |     }
118 |   }
119 | 
120 |   /**
121 |    * Simple VertexOutputFormat that supports {@link PageRankTolFinderVertex}
122 |    */
123 |   public static class PageRankTolFinderVertexOutputFormat extends
124 |       TextVertexOutputFormat<LongWritable, DoubleWritable, NullWritable> {
125 |     @Override
126 |     public TextVertexWriter createVertexWriter(TaskAttemptContext context)
127 |       throws IOException, InterruptedException {
128 |       return new PageRankTolFinderVertexWriter();
129 |     }
130 | 
131 |     /**
132 |      * Simple VertexWriter that supports {@link PageRankTolFinderVertex}
133 |      */
134 |     public class PageRankTolFinderVertexWriter extends TextVertexWriter {
135 |       @Override
136 |       public void writeVertex(
137 |           Vertex<LongWritable, DoubleWritable, NullWritable, ?> vertex)
138 |         throws IOException, InterruptedException {
139 |         // don't need to output anything---we don't care about results
140 |         //getRecordWriter().write(
141 |         //    new Text(vertex.getId().toString()),
142 |         //    new Text(vertex.getValue().toString()));
143 |       }
144 |     }
145 |   }
146 | }
147 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/SimplePageRankInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.giraph.examples;
 20 | 
 21 | import com.google.common.collect.Lists;
 22 | import org.apache.giraph.edge.Edge;
 23 | import org.apache.giraph.edge.EdgeFactory;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.hadoop.io.DoubleWritable;
 26 | import org.apache.hadoop.io.NullWritable;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 29 | import org.apache.hadoop.mapreduce.InputSplit;
 30 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.List;
 34 | import java.util.regex.Pattern;
 35 | 
 36 | /**
 37 |  * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
 38 |  * {@link org.apache.giraph.examples.SimplePageRankVertex}.
 39 |  *
 40 |  * Inputs have long ids, no edge weights, and no vertex values.
 41 |  * (Vertex values are set to a double of 0.0.)
 42 |  *
 43 |  * Each line consists of:
 44 |  * vertex neighbor1 neighbor2 ...
 45 |  *
 46 |  * Values can be separated by spaces or tabs.
 47 |  */
 48 | public class SimplePageRankInputFormat extends
 49 |     TextVertexInputFormat<LongWritable, DoubleWritable, NullWritable> {
 50 |   /** Separator of the vertex and neighbors */
 51 |   private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
 52 | 
 53 |   @Override
 54 |   public TextVertexReader createVertexReader(InputSplit split,
 55 |       TaskAttemptContext context)
 56 |     throws IOException {
 57 |     return new SimplePageRankVertexReader();
 58 |   }
 59 | 
 60 |   /**
 61 |    * Vertex reader associated with {@link SimplePageRankInputFormat}.
 62 |    */
 63 |   public class SimplePageRankVertexReader extends
 64 |     TextVertexReaderFromEachLineProcessed<String[]> {
 65 |     /**
 66 |      * Cached vertex id for the current line
 67 |      */
 68 |     private LongWritable id;
 69 | 
 70 |     @Override
 71 |     protected String[] preprocessLine(Text line) throws IOException {
 72 |       String[] tokens = SEPARATOR.split(line.toString());
 73 |       id = new LongWritable(Long.parseLong(tokens[0]));
 74 |       return tokens;
 75 |     }
 76 | 
 77 |     @Override
 78 |     protected LongWritable getId(String[] tokens) throws IOException {
 79 |       return id;
 80 |     }
 81 | 
 82 |     @Override
 83 |     protected DoubleWritable getValue(String[] tokens) throws IOException {
 84 |       return new DoubleWritable(0.0);
 85 |     }
 86 | 
 87 |     @Override
 88 |     protected Iterable<Edge<LongWritable, NullWritable>> getEdges(
 89 |         String[] tokens) throws IOException {
 90 |       List<Edge<LongWritable, NullWritable>> edges =
 91 |           Lists.newArrayListWithCapacity(tokens.length - 1);
 92 |       for (int i = 1; i < tokens.length; i++) {
 93 |         edges.add(EdgeFactory.create(
 94 |             new LongWritable(Long.parseLong(tokens[i]))));
 95 |       }
 96 |       return edges;
 97 |     }
 98 |   }
 99 | }
100 | 


--------------------------------------------------------------------------------
/giraph-1.0.0/giraph-examples/src/main/java/org/apache/giraph/examples/SimpleShortestPathsInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package org.apache.giraph.examples;
 20 | 
 21 | import com.google.common.collect.Lists;
 22 | import org.apache.giraph.edge.Edge;
 23 | import org.apache.giraph.edge.EdgeFactory;
 24 | import org.apache.hadoop.io.LongWritable;
 25 | import org.apache.hadoop.io.DoubleWritable;
 26 | import org.apache.hadoop.io.FloatWritable;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.giraph.io.formats.TextVertexInputFormat;
 29 | import org.apache.hadoop.mapreduce.InputSplit;
 30 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 31 | 
 32 | import java.io.IOException;
 33 | import java.util.List;
 34 | import java.util.regex.Pattern;
 35 | 
 36 | /**
 37 |  * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
 38 |  * {@link org.apache.giraph.examples.SimpleShortestPathsVertex}.
 39 |  *
 40 |  * Inputs have long ids, no edge weights, and no vertex values.
 41 |  * (Edge weights are set to a float of 1.0, vertex values are
 42 |  *  set to a double of 0.0.)
 43 |  *
 44 |  * Each line consists of:
 45 |  * vertex neighbor1 neighbor2 ...
 46 |  *
 47 |  * Values can be separated by spaces or tabs.
 48 |  */
 49 | public class SimpleShortestPathsInputFormat extends
 50 |     TextVertexInputFormat<LongWritable, DoubleWritable, FloatWritable> {
 51 |   /** Separator of the vertex and neighbors */
 52 |   private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
 53 | 
 54 |   @Override
 55 |   public TextVertexReader createVertexReader(InputSplit split,
 56 |       TaskAttemptContext context)
 57 |     throws IOException {
 58 |     return new SimpleShortestPathsVertexReader();
 59 |   }
 60 | 
 61 |   /**
 62 |    * Vertex reader associated with {@link SimpleShortestPathsInputFormat}.
 63 |    */
 64 |   public class SimpleShortestPathsVertexReader extends
 65 |     TextVertexReaderFromEachLineProcessed<String[]> {
 66 |     /**
 67 |      * Cached vertex id for the current line
 68 |      */
 69 |     private LongWritable id;
 70 | 
 71 |     @Override
 72 |     protected String[] preprocessLine(Text line) throws IOException {
 73 |       String[] tokens = SEPARATOR.split(line.toString());
 74 |       id = new LongWritable(Long.parseLong(tokens[0]));
 75 |       return tokens;
 76 |     }
 77 | 
 78 |     @Override
 79 |     protected LongWritable getId(String[] tokens) throws IOException {
 80 |       return id;
 81 |     }
 82 | 
 83 |     @Override
 84 |     protected DoubleWritable getValue(String[] tokens) throws IOException {
 85 |       return new DoubleWritable(0.0);
 86 |     }
 87 | 
 88 |     @Override
 89 |     protected Iterable<Edge<LongWritable, FloatWritable>> getEdges(
 90 |         String[] tokens) throws IOException {
 91 |       List<Edge<LongWritable, FloatWritable>> edges =
 92 |           Lists.newArrayListWithCapacity(tokens.length - 1);
 93 |       for (int i = 1; i < tokens.length; i++) {
 94 |         edges.add(EdgeFactory.create(
 95 |               new LongWritable(Long.parseLong(tokens[i])),
 96 |               new FloatWritable((float) 1.0)));
 97 |       }
 98 |       return edges;
 99 |     }
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/gps-rev-110/local-master-scripts/make_gps_node_runner_jar.sh:
--------------------------------------------------------------------------------
 1 | cd .. 
 2 | GPS_DIR="`pwd`"
 3 | GPS_SRC_DIR=${GPS_DIR}/src
 4 | GPS_CLASSES_DIR=${GPS_DIR}/classes
 5 | LIBS_DIR=${GPS_DIR}/libs
 6 | 
 7 | echo "removing ${GPS_DIR}/gps_node_runner.jar"
 8 | rm ${GPS_DIR}/gps_node_runner.jar
 9 | 
10 | echo "removing ${GPS_CLASSES_DIR}"
11 | rm -rf ${GPS_CLASSES_DIR}
12 | 
13 | echo "making ${GPS_CLASSES_DIR}"
14 | mkdir ${GPS_CLASSES_DIR}
15 | 
16 | echo "cding into ${GPS_SRC_DIR}"
17 | cd ${GPS_SRC_DIR}
18 | 
19 | find java/gps/examples -name \*.java -print > file.list
20 | # TODO: bug fix? works without "echo" so this line might be old code
21 | #$GPS_SRC_DIR/java/gps/node/GPSNodeRunner.java >> file.list
22 | echo "compiling GPSNodeRunner to classes directory"
23 | #javac -verbose \
24 | javac \
25 | -cp $LIBS_DIR/asm-3.3.1.jar:$LIBS_DIR/guava-r08.jar:$LIBS_DIR/objenesis-1.2.jar:$LIBS_DIR/cglib-2.2.jar:$LIBS_DIR/commons-cli-1.2.jar:$LIBS_DIR/jline-0.9.94.jar:$LIBS_DIR/log4j-1.2.15.jar:$LIBS_DIR/commons-logging-1.1.1.jar:$LIBS_DIR/hadoop-core-1.0.4.jar:$LIBS_DIR/commons-collections-3.2.1.jar:$LIBS_DIR/commons-lang-2.4.jar:$LIBS_DIR/commons-configuration-1.6.jar:$LIBS_DIR/tools.jar:$LIBS_DIR/mina-core-2.0.3.jar:$LIBS_DIR/mina-example-2.0.3.jar:$LIBS_DIR/slf4j-api-1.6.1.jar:$LIBS_DIR/colt.jar:$LIBS_DIR/concurrent.jar:$GPS_SRC_DIR/java \
26 | -d ${GPS_CLASSES_DIR} \
27 | @file.list
28 | 
29 | echo "cding into ${GPS_CLASSES_DIR}"
30 | cd ${GPS_CLASSES_DIR}
31 | pwd
32 | echo "making gps_node_runner.jar..."
33 | #jar -cmvf $GPS_DIR/local-master-scripts/manifest.txt ../gps_node_runner.jar gps/
34 | jar -cmf $GPS_DIR/local-master-scripts/manifest.txt ../gps_node_runner.jar gps/


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/examples/dimest/DiameterEstimationVertex.java:
--------------------------------------------------------------------------------
  1 | package gps.examples.dimest;
  2 | 
  3 | import org.apache.commons.cli.CommandLine;
  4 | 
  5 | import gps.graph.NullEdgeVertex;
  6 | import gps.graph.NullEdgeVertexFactory;
  7 | import gps.node.GPSJobConfiguration;
  8 | import gps.node.GPSNodeRunner;
  9 | import gps.writable.LongArrayWritable;
 10 | 
 11 | 
 12 | import java.util.Arrays;
 13 | 
 14 | 
 15 | /**
 16 |  * GPS implementation of Flajolet-Martin diameter estimation.
 17 |  *
 18 |  * @author Young
 19 |  */
 20 | public class DiameterEstimationVertex extends NullEdgeVertex<LongArrayWritable, LongArrayWritable> {
 21 | 
 22 |   public static int DEFAULT_NUM_MAX_ITERATIONS = 30;
 23 |   public static int numMaxIterations;
 24 | 
 25 |   /** K is number of bitstrings to use,
 26 |       larger K = more concentrated estimate **/
 27 |   public static final int K = 8;
 28 | 
 29 |   /** Bit shift constant **/
 30 |   private static final int V62 = 62;
 31 |   /** Bit shift constant **/
 32 |   private static final int V1 = 1;
 33 | 
 34 |   public DiameterEstimationVertex(CommandLine line) {
 35 |     String otherOptsStr = line.getOptionValue(GPSNodeRunner.OTHER_OPTS_OPT_NAME);
 36 |     System.out.println("otherOptsStr: " + otherOptsStr);
 37 |     numMaxIterations = DEFAULT_NUM_MAX_ITERATIONS;
 38 |     if (otherOptsStr != null) {
 39 |       String[] split = otherOptsStr.split("###");
 40 |       for (int index = 0; index < split.length; ) {
 41 |         String flag = split[index++];
 42 |         String value = split[index++];
 43 |         if ("-max".equals(flag)) {
 44 |           numMaxIterations = Integer.parseInt(value);
 45 |           System.out.println("numMaxIterations: " + numMaxIterations);
 46 |         }
 47 |       }
 48 |     }
 49 |   }
 50 | 
 51 |   @Override
 52 |   public void compute(Iterable<LongArrayWritable> incomingMessages, int superstepNo) {
 53 |     if (superstepNo == 1) {
 54 |       long[] value = new long[K];
 55 |       int finalBitCount = 63;
 56 |       long rndVal = 0;
 57 | 
 58 |       for (int j = 0; j < value.length; j++) {
 59 |         rndVal = createRandomBM(finalBitCount);
 60 |         value[j] = V1 << (V62 - rndVal);
 61 |       }
 62 | 
 63 |       LongArrayWritable arr = new LongArrayWritable(value);
 64 |       sendMessages(getNeighborIds(), arr);
 65 |       setValue(arr);
 66 | 
 67 |       //System.out.println(getId() + ": done superstep 1... " + getValue());
 68 |       return;
 69 |     }
 70 | 
 71 |     //System.out.println(getId() + ": normal superstep... " + getValue());
 72 | 
 73 |     // get direct reference to vertex value's array
 74 |     long[] newBitmask = getValue().get();
 75 | 
 76 |     // Some vertices have in-edges but no out-edges, so they're NOT
 77 |     // listed in the input graphs (from SNAP). This causes a new
 78 |     // vertex to be added during the 2nd superstep, and its value
 79 |     // to be non-initialized (i.e., empty array []). Since such
 80 |     // vertices have no out-edges, we can just halt.
 81 |     if (newBitmask.length == 0) {
 82 |       voteToHalt();
 83 |       return;
 84 |     }
 85 | 
 86 |     boolean isChanged = false;
 87 |     long[] tmpBitmask;
 88 |     long tmp;
 89 | 
 90 |     for (LongArrayWritable message : incomingMessages) {
 91 |       tmpBitmask = message.get();
 92 |       
 93 | //      if (tmpBitmask.length == 0) {
 94 | //        System.out.println(getId() + ": got empty message??");
 95 | //      } else {
 96 | //        System.out.println(getId() + ": got " + message);
 97 | //      }
 98 | 
 99 |       // both arrays are of length K
100 |       for (int i = 0; i < K; i++) {
101 |         tmp = newBitmask[i];      // store old value
102 | 
103 |         // NOTE: this modifies vertex value directly
104 |         newBitmask[i] = newBitmask[i] | tmpBitmask[i];
105 | 
106 |         // check if there's a change
107 |         // NOTE: unused for now---to terminate when all vertices converge,
108 |         // use an aggregator to track # of vertices that have finished
109 |         //isChanged = isChanged || (tmp != newBitmask[i]);
110 |       }
111 |     }
112 | 
113 |     //System.out.println(getId() + ": final array is " + getValue());
114 | 
115 |     // WARNING: we cannot terminate based on LOCAL steady state,
116 |     // we need all vertices computing until the very end
117 |     if (superstepNo >= numMaxIterations) {
118 |       //System.out.println(getId() + ": voting to halt");
119 |       voteToHalt();
120 | 
121 |     } else {
122 |       //System.out.println(getId() + ": not halting... sending message");
123 | 
124 |       // otherwise, send our neighbours our bitstrings
125 |       sendMessages(getNeighborIds(), getValue());
126 |     }
127 |   }
128 | 
129 |   // Source: Mizan, which took this from Pegasus
130 |   /**
131 |    * Creates random bitstring.
132 |    *
133 |    * @param sizeBitmask Number of bits.
134 |    * @return Random bit index.
135 |    */
136 |   private int createRandomBM(int sizeBitmask) {
137 |     int j;
138 | 
139 |     // random() gives double in [0,1)---just like in Mizan
140 |     // NOTE: we use the default seed set by java.util.Random()
141 |     double curRandom = Math.random();
142 |     double threshold = 0;
143 | 
144 |     for (j = 0; j < sizeBitmask - 1; j++) {
145 |       threshold += Math.pow(2.0, -1.0 * j - 1.0);
146 | 
147 |       if (curRandom < threshold) {
148 |         break;
149 |       }
150 |     }
151 | 
152 |     return j;
153 |   }
154 | 
155 |   @Override
156 |   public LongArrayWritable getInitialValue(int id) {
157 |     return new LongArrayWritable();
158 |   }
159 | 
160 |   /**
161 |    * Factory class for {@link DiameterEstimationVertex}.
162 |    *
163 |    * @author Young
164 |    */
165 |   public static class DiameterEstimationVertexFactory extends NullEdgeVertexFactory<LongArrayWritable, LongArrayWritable> {
166 | 
167 |     @Override
168 |     public NullEdgeVertex<LongArrayWritable, LongArrayWritable> newInstance(CommandLine commandLine) {
169 |       return new DiameterEstimationVertex(commandLine);
170 |     }
171 |   }
172 | 
173 |   public static class JobConfiguration extends GPSJobConfiguration {
174 | 
175 |     @Override
176 |     public Class<?> getVertexFactoryClass() {
177 |       return DiameterEstimationVertexFactory.class;
178 |     }
179 | 
180 |     @Override
181 |     public Class<?> getVertexClass() {
182 |       return DiameterEstimationVertex.class;
183 |     }
184 | 
185 |     @Override
186 |     public Class<?> getVertexValueClass() {
187 |       return LongArrayWritable.class;
188 |     }
189 | 
190 |     @Override
191 |     public Class<?> getMessageValueClass() {
192 |       return LongArrayWritable.class;
193 |     }
194 |   }
195 | }
196 | 


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/examples/pagerank/PageRankVertex.java:
--------------------------------------------------------------------------------
  1 | package gps.examples.pagerank;
  2 | 
  3 | import org.apache.commons.cli.CommandLine;
  4 | 
  5 | import gps.globalobjects.BooleanANDGlobalObject;
  6 | import gps.globalobjects.DoubleMaxGlobalObject;
  7 | import gps.globalobjects.DoubleSumGlobalObject;
  8 | import gps.globalobjects.FloatSumGlobalObject;
  9 | import gps.globalobjects.GlobalObjectsMap;
 10 | import gps.globalobjects.IntMaxGlobalObject;
 11 | import gps.globalobjects.IntSumGlobalObject;
 12 | import gps.globalobjects.LongSumGlobalObject;
 13 | import gps.graph.NullEdgeVertex;
 14 | import gps.graph.NullEdgeVertexFactory;
 15 | import gps.node.GPSJobConfiguration;
 16 | import gps.node.GPSNodeRunner;
 17 | import gps.writable.DoubleWritable;
 18 | 
 19 | /**
 20 |  * GPS implementation of PageRank algorithm.
 21 |  *
 22 |  * @author semihsalihoglu
 23 |  */
 24 | public class PageRankVertex extends NullEdgeVertex<DoubleWritable, DoubleWritable> {
 25 | 
 26 |   public static int DEFAULT_NUM_MAX_ITERATIONS = 30;
 27 |   public static int numMaxIterations;
 28 | 
 29 |   public PageRankVertex(CommandLine line) {
 30 |     String otherOptsStr = line.getOptionValue(GPSNodeRunner.OTHER_OPTS_OPT_NAME);
 31 |     System.out.println("otherOptsStr: " + otherOptsStr);
 32 | 
 33 |     numMaxIterations = DEFAULT_NUM_MAX_ITERATIONS;
 34 | 
 35 |     if (otherOptsStr != null) {
 36 |       String[] split = otherOptsStr.split("###");
 37 | 
 38 |       for (int index = 0; index < split.length; ) {
 39 |         String flag = split[index++];
 40 |         String value = split[index++];
 41 | 
 42 |         if ("-max".equals(flag)) {
 43 |           numMaxIterations = Integer.parseInt(value);
 44 |           System.out.println("numMaxIterations: " + numMaxIterations);
 45 |         }
 46 |       }
 47 |     }
 48 |   }
 49 | 
 50 |   @Override
 51 |   public void compute(Iterable<DoubleWritable> incomingMessages, int superstepNo) {
 52 |     // NOTE: We follow GraphLab's alternative way of computing PageRank,
 53 |     // which is to not divide by |V|. To get the probability value at
 54 |     // each vertex, take its PageRank value and divide by |V|.
 55 | 
 56 |     //int numVertices = ((IntSumGlobalObject) getGlobalObjectsMap().getGlobalObject(
 57 |     //  GlobalObjectsMap.NUM_TOTAL_VERTICES)).getValue().getValue();
 58 | 
 59 |     if (superstepNo == 1) {
 60 |       setValue(new DoubleWritable(1.0));
 61 |       //setValue(new DoubleWritable((double) 1 / (double) numVertices));
 62 |       sendMessages(getNeighborIds(), getValue());
 63 |       return;
 64 |     }
 65 | 
 66 |     double oldVal = getValue().getValue();
 67 |     double sum = 0.0;
 68 |     for (DoubleWritable messageValue : incomingMessages) {
 69 |       sum += messageValue.getValue();
 70 |     }
 71 | 
 72 |     double currentState = 0.85 * sum  + 0.15;
 73 |     //double currentState = 0.85 * sum/getNeighborIds().length  + 0.15 / (double) numVertices;
 74 | 
 75 |     setValue(new DoubleWritable(currentState));
 76 | 
 77 |     // Termination condition based on max supersteps
 78 |     int[] neighborIds = getNeighborIds();
 79 |     // FIX: divide by sender's out-degree rather than receiver's out-degree
 80 |     // (i.e., don't do "currentState = 0.85 * sum / neighborIds.length + ...")
 81 |     DoubleWritable messageValue = new DoubleWritable(currentState / neighborIds.length);
 82 |     sendMessages(neighborIds, messageValue);
 83 |     
 84 |     if (superstepNo == numMaxIterations) {
 85 |       voteToHalt();
 86 |     }
 87 |   }
 88 | 
 89 |   @Override
 90 |   public DoubleWritable getInitialValue(int id) {
 91 |     return new DoubleWritable(0.1);
 92 |   }
 93 | 
 94 |   /**
 95 |    * Factory class for {@link PageRankVertex}.
 96 |    *
 97 |    * @author semihsalihoglu
 98 |    */
 99 |   public static class PageRankVertexFactory extends NullEdgeVertexFactory<DoubleWritable, DoubleWritable> {
100 | 
101 |     @Override
102 |     public NullEdgeVertex<DoubleWritable, DoubleWritable> newInstance(CommandLine commandLine) {
103 |       return new PageRankVertex(commandLine);
104 |     }
105 |   }
106 | 
107 |   public static class JobConfiguration extends GPSJobConfiguration {
108 | 
109 |     @Override
110 |     public Class<?> getVertexFactoryClass() {
111 |       return PageRankVertexFactory.class;
112 |     }
113 | 
114 |     @Override
115 |     public Class<?> getVertexClass() {
116 |       return PageRankVertex.class;
117 |     }
118 | 
119 |     @Override
120 |     public Class<?> getVertexValueClass() {
121 |       return DoubleWritable.class;
122 |     }
123 | 
124 |     @Override
125 |     public Class<?> getMessageValueClass() {
126 |       return DoubleWritable.class;
127 |     }
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/examples/sssp/SSSPVertex.java:
--------------------------------------------------------------------------------
  1 | package gps.examples.sssp;
  2 | 
  3 | import org.apache.commons.cli.CommandLine;
  4 | 
  5 | import gps.graph.NullEdgeVertex;
  6 | import gps.graph.NullEdgeVertexFactory;
  7 | import gps.node.GPSJobConfiguration;
  8 | import gps.node.GPSNodeRunner;
  9 | import gps.writable.IntWritable;
 10 | 
 11 | // NOTE: this is different from SingleSourceAllVerticesShortestPathVertex,
 12 | // in that we don't use the boolean shortcut method.
 13 | //
 14 | // Instead, this is a modification of gps.examples.edgevaluesssp.EdgeValueSSSPVertex,
 15 | // where edge values are all 1. This matches the implementations in Giraph and GPS.
 16 | public class SSSPVertex extends NullEdgeVertex<IntWritable, IntWritable> {
 17 | 
 18 | 	private static int DEFAULT_SOURCE_ID = 0;
 19 | 	private int sourceId;
 20 | 	public SSSPVertex() {
 21 | 	}
 22 | 	
 23 | 	public SSSPVertex(CommandLine line) {
 24 | 		String otherOptsStr = line.getOptionValue(GPSNodeRunner.OTHER_OPTS_OPT_NAME);
 25 | 		System.out.println("otherOptsStr: " + otherOptsStr);
 26 | 		sourceId = DEFAULT_SOURCE_ID;
 27 | 		if (otherOptsStr != null) {
 28 | 			String[] split = otherOptsStr.split("###");
 29 | 			for (int index = 0; index < split.length; ) {
 30 | 				String flag = split[index++];
 31 | 				String value = split[index++];
 32 | 				if ("-root".equals(flag)) {
 33 | 					sourceId = Integer.parseInt(value);
 34 | 					System.out.println("sourceId: " + sourceId);
 35 | 				}
 36 | 			}
 37 | 		}
 38 | 	}
 39 | 
 40 | 	@Override
 41 | 	public void compute(Iterable<IntWritable> messageValues, int superstepNo) {
 42 | 		int previousDistance = getValue().getValue();
 43 | 		if (superstepNo == 1) {
 44 | 			if (previousDistance == Integer.MAX_VALUE) {
 45 | 				voteToHalt();
 46 | 			} else {
 47 | 				sendMessages(getNeighborIds(),
 48 |                      new IntWritable(getValue().getValue() + 1));
 49 | 			}
 50 | 		} else {
 51 | 			int minValue = previousDistance;
 52 | 			int messageValueInt;
 53 | 			for (IntWritable messageValue : messageValues) {
 54 | 				messageValueInt = messageValue.getValue();
 55 | 				if (messageValueInt < minValue) {
 56 | 					minValue = messageValueInt;
 57 | 				}
 58 | 			}
 59 | 			int currentDistance = minValue;
 60 | 			if (currentDistance < previousDistance) {
 61 | 				IntWritable newState = new IntWritable(currentDistance);
 62 | 				setValue(newState);
 63 | 				sendMessages(getNeighborIds(),
 64 |                      new IntWritable(getValue().getValue() + 1));
 65 | 			} else {
 66 | 				voteToHalt();
 67 | 			}
 68 | 		}
 69 | 	}
 70 | 
 71 | 	@Override
 72 | 	public IntWritable getInitialValue(int id) {
 73 | 		return id == sourceId ? new IntWritable(0) : new IntWritable(Integer.MAX_VALUE);
 74 | 	}
 75 | 
 76 | 	/**
 77 | 	 * Factory class for {@link SSSPVertex}.
 78 | 	 * 
 79 | 	 * @author semihsalihoglu
 80 | 	 */
 81 | 	public static class SSSPVertexFactory
 82 | 		extends NullEdgeVertexFactory<IntWritable, IntWritable> {
 83 |  
 84 | 		@Override
 85 | 		public NullEdgeVertex<IntWritable, IntWritable> newInstance(CommandLine commandLine) {
 86 | 			return new SSSPVertex(commandLine);
 87 | 		}
 88 | 	}
 89 | 	
 90 | 	public static class JobConfiguration extends GPSJobConfiguration {
 91 | 
 92 | 		@Override
 93 | 		public Class<?> getVertexFactoryClass() {
 94 | 			return SSSPVertexFactory.class;
 95 | 		}
 96 | 
 97 | 		@Override
 98 | 		public Class<?> getVertexClass() {
 99 | 			return SSSPVertex.class;
100 | 		}
101 | 
102 | 		@Override
103 | 		public Class<?> getVertexValueClass() {
104 | 			return IntWritable.class;
105 | 		}
106 | 
107 | 		@Override
108 | 		public Class<?> getMessageValueClass() {
109 | 			return IntWritable.class;
110 | 		}
111 | 	}
112 | }
113 | 


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/examples/sssp/SingleSourceAllVerticesShortestPathVertex.java:
--------------------------------------------------------------------------------
  1 | package gps.examples.sssp;
  2 | 
  3 | import org.apache.commons.cli.CommandLine;
  4 | 
  5 | import gps.graph.NullEdgeVertex;
  6 | import gps.graph.NullEdgeVertexFactory;
  7 | import gps.node.GPSJobConfiguration;
  8 | import gps.node.GPSNodeRunner;
  9 | import gps.writable.BooleanWritable;
 10 | import gps.writable.IntWritable;
 11 | 
 12 | public class SingleSourceAllVerticesShortestPathVertex extends NullEdgeVertex<IntWritable, BooleanWritable> {
 13 | 
 14 | 	private static int DEFAULT_ROOT_ID = 0;
 15 | 	private int root;
 16 | 	protected boolean isFLPS = false;
 17 | 	protected IntWritable numRecentlyUpdatedVertices;
 18 | 
 19 | 	public SingleSourceAllVerticesShortestPathVertex(CommandLine line) {
 20 | 		String otherOptsStr = line.getOptionValue(GPSNodeRunner.OTHER_OPTS_OPT_NAME);
 21 | 		System.out.println("otherOptsStr: " + otherOptsStr);
 22 | 		root = DEFAULT_ROOT_ID;
 23 | 		if (otherOptsStr != null) {
 24 | 			String[] split = otherOptsStr.split("###");
 25 | 			for (int index = 0; index < split.length; ) {
 26 | 				String flag = split[index++];
 27 | 				String value = split[index++];
 28 | 				if ("-root".equals(flag)) {
 29 | 					root = Integer.parseInt(value);
 30 | 					System.out.println("sourceId: " + root);
 31 | 				}
 32 | 			}
 33 | 		}
 34 | 	}
 35 | 
 36 | 	@Override
 37 | 	public void compute(Iterable<BooleanWritable> messageValues, int superstepNo) {
 38 | 		performRegularLabelPropagation(messageValues, superstepNo);
 39 | 	}
 40 | 
 41 | 	protected void performRegularLabelPropagation(Iterable<BooleanWritable> messageValues, int superstepNo) {
 42 | 		int previousDistance = getValue().getValue();
 43 | 		if (superstepNo == 1) {
 44 | 			if (previousDistance == Integer.MAX_VALUE) {
 45 | 				if (!isFLPS) {
 46 | 					voteToHalt();
 47 | 				}
 48 | 			} else {
 49 | 				sendMessages(getNeighborIds(), new BooleanWritable());
 50 | 				if (isFLPS) {
 51 | 					numRecentlyUpdatedVertices.value++;
 52 | 					voteToHalt();
 53 | 				}
 54 | 			}
 55 | 		} else {
 56 | 			if (previousDistance != Integer.MAX_VALUE) {
 57 | 				if (!isFLPS) {
 58 | 					voteToHalt();
 59 | 				}
 60 | 			} else if (messageValues.iterator().hasNext()) {
 61 |         // BUGFIX: distance 1 will occur at superstep 2, so *subtract* 1
 62 | 				setValue(new IntWritable(superstepNo - 1));
 63 | 				sendMessages(getNeighborIds(), new BooleanWritable());
 64 | 				if (isFLPS) {
 65 | 					numRecentlyUpdatedVertices.value++;
 66 | 					voteToHalt();
 67 | 				}
 68 | 			}
 69 | 		}
 70 | 	}
 71 | 
 72 | 	@Override
 73 | 	public IntWritable getInitialValue(int id) {
 74 | 		return id == root ? new IntWritable(0) : new IntWritable(Integer.MAX_VALUE);
 75 | 	}
 76 | 	
 77 | 	/**
 78 | 	 * Factory class for {@link SingleSourceAllVerticesShortestPathVertex}.
 79 | 	 * 
 80 | 	 * @author semihsalihoglu
 81 | 	 */
 82 | 	public static class SingleSourceAllVerticesShortestPathVertexFactory extends NullEdgeVertexFactory<IntWritable, BooleanWritable> {
 83 | 
 84 | 		@Override
 85 | 		public NullEdgeVertex<IntWritable, BooleanWritable> newInstance(CommandLine commandLine) {
 86 | 			return new SingleSourceAllVerticesShortestPathVertex(commandLine);
 87 | 		}
 88 | 	}
 89 | 
 90 | 	public static class JobConfiguration extends GPSJobConfiguration {
 91 | 
 92 | 		@Override
 93 | 		public Class<?> getVertexFactoryClass() {
 94 | 			return SingleSourceAllVerticesShortestPathVertexFactory.class;
 95 | 		}
 96 | 
 97 | 		@Override
 98 | 		public Class<?> getVertexClass() {
 99 | 			return SingleSourceAllVerticesShortestPathVertex.class;
100 | 		}
101 | 
102 | 		@Override
103 | 		public Class<?> getVertexValueClass() {
104 | 			return IntWritable.class;
105 | 		}
106 | 
107 | 		@Override
108 | 		public Class<?> getMessageValueClass() {
109 | 			return BooleanWritable.class;
110 | 		}
111 | 	}
112 | }


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/examples/wcc/WeaklyConnectedComponentsVertex.java:
--------------------------------------------------------------------------------
 1 | package gps.examples.wcc;
 2 | 
 3 | import org.apache.commons.cli.CommandLine;
 4 | 
 5 | import gps.graph.NullEdgeVertex;
 6 | import gps.graph.NullEdgeVertexFactory;
 7 | import gps.node.GPSJobConfiguration;
 8 | import gps.node.GPSNodeRunner;
 9 | import gps.writable.IntWritable;
10 | 
11 | public class WeaklyConnectedComponentsVertex extends NullEdgeVertex<IntWritable, IntWritable>{
12 | 
13 | 	private int minValue;
14 | 	//public static int DEFAULT_NUM_MAX_ITERATIONS = 999;
15 | 	public static int numMaxIterations;
16 | 	public WeaklyConnectedComponentsVertex(CommandLine line) {
17 | 		//String otherOptsStr = line.getOptionValue(GPSNodeRunner.OTHER_OPTS_OPT_NAME);
18 | 		//System.out.println("otherOptsStr: " + otherOptsStr);
19 | 		//numMaxIterations = DEFAULT_NUM_MAX_ITERATIONS;
20 | 		//if (otherOptsStr != null) {
21 | 		//  String[] split = otherOptsStr.split("###");
22 | 		//  for (int index = 0; index < split.length; ) {
23 | 		//  	String flag = split[index++];
24 | 		//  	String value = split[index++];
25 | 		//  	if ("-nmi".equals(flag)) {
26 | 		//  		numMaxIterations = Integer.parseInt(value);
27 | 		//  		System.out.println("numMaxIterations: " + numMaxIterations);
28 | 		//  	}
29 | 		//  }
30 | 		//}
31 | 	}
32 | 	@Override
33 | 	public void compute(Iterable<IntWritable> messageValues, int superstepNo) {
34 | 		if (superstepNo == 1) {
35 | 			setValue(new IntWritable(getId()));
36 | 			sendMessages(getNeighborIds(), getValue());
37 | 		} else {
38 | 			minValue = getValue().getValue();
39 | 			for (IntWritable message : messageValues) {
40 | 				if (message.getValue() < minValue) {
41 | 					minValue = message.getValue();
42 | 				}
43 | 			}
44 | 			if (minValue < getValue().getValue()) {
45 | 				setValue(new IntWritable(minValue));
46 | 				sendMessages(getNeighborIds(), getValue());
47 | 			} else {
48 | 				voteToHalt();
49 | 			}
50 | 
51 |       // No superstep termination conditions---run to completion instead
52 | 			//if (superstepNo == numMaxIterations) {
53 | 			//	voteToHalt();
54 | 			//}
55 | 		}
56 | 	}
57 | 
58 | 	@Override
59 | 	public IntWritable getInitialValue(int id) {
60 | 		return new IntWritable(getId());
61 | 	}
62 | 	
63 | 	public static class WeaklyConnectedComponentsVertexFactory extends
64 | 		NullEdgeVertexFactory<IntWritable, IntWritable> {
65 | 
66 | 		@Override
67 | 		public NullEdgeVertex<IntWritable, IntWritable> newInstance(CommandLine commandline) {
68 | 			return new WeaklyConnectedComponentsVertex(commandline);
69 | 		}
70 | 	}
71 | 
72 | 	public static class JobConfiguration extends GPSJobConfiguration {
73 | 
74 | 		@Override
75 | 		public Class<?> getVertexFactoryClass() {
76 | 			return WeaklyConnectedComponentsVertexFactory.class;
77 | 		}
78 | 
79 | 		@Override
80 | 		public Class<?> getVertexClass() {
81 | 			return WeaklyConnectedComponentsVertex.class;
82 | 		}
83 | 
84 | 		@Override
85 | 		public Class<?> getVertexValueClass() {
86 | 			return IntWritable.class;
87 | 		}
88 | 
89 | 		@Override
90 | 		public Class<?> getMessageValueClass() {
91 | 			return IntWritable.class;
92 | 		}
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/node/worker/dynamic/VertexWrapper.java:
--------------------------------------------------------------------------------
 1 | package gps.node.worker.dynamic;
 2 | 
 3 | import gps.writable.MinaWritable;
 4 | 
 5 | public class VertexWrapper<V extends MinaWritable> {
 6 | 	public int originalId;
 7 | 	public int[] neighborIds;
 8 | 	public V state;
 9 | 	public boolean isActive;
10 | 	public int toOrFromMachineId;
11 | }
12 | 


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/node/worker/dynamic/greedy/BaseGreedyDynamicGPSWorkerImpl.java:
--------------------------------------------------------------------------------
  1 | package gps.node.worker.dynamic.greedy;
  2 | 
  3 | import static gps.node.worker.GPSWorkerExposedGlobalVariables.*;
  4 | import gps.communication.MessageSenderAndReceiverFactory;
  5 | import gps.graph.Graph;
  6 | import gps.graph.VertexFactory;
  7 | import gps.messages.storage.ArrayBackedIncomingMessageStorage;
  8 | import gps.node.GPSJobConfiguration;
  9 | import gps.node.MachineConfig;
 10 | import gps.node.worker.AbstractGPSWorker;
 11 | import gps.writable.MinaWritable;
 12 | import gps.writable.NullWritable;
 13 | 
 14 | import org.apache.commons.cli.CommandLine;
 15 | import org.apache.hadoop.fs.FileSystem;
 16 | 
 17 | public abstract class BaseGreedyDynamicGPSWorkerImpl<V extends MinaWritable,
 18 | 	E extends MinaWritable, M extends MinaWritable> extends AbstractGPSWorker<V, E, M> {
 19 | 
 20 | 	public static int[] machineCommunicationHistogram;
 21 | 	protected boolean[] fasterMachines;
 22 | 	protected final int edgeThreshold;
 23 | 	protected int benefitThreshold;
 24 | 	protected int superstepNoToStopDynamism;
 25 | 
 26 | 	public BaseGreedyDynamicGPSWorkerImpl(int localMachineId, CommandLine commandLine,
 27 | 		FileSystem fileSystem, MachineConfig machineConfig, Graph<V, E> graphPartition,
 28 | 		VertexFactory<V, E, M> vertexFactory, int graphSize, int outgoingBufferSizes,
 29 | 		String outputFileName, MessageSenderAndReceiverFactory messageSenderAndReceiverFactory,
 30 | 		ArrayBackedIncomingMessageStorage<M> incomingMessageStorage, int benefitThreshold,
 31 | 		int edgeThreshold, long pollingTime, int maxMessagesToTransmitConcurrently,
 32 | 		int numVerticesFrequencyToCheckOutgoingBuffers,
 33 | 		int sleepTimeWhenOutgoingBuffersExceedThreshold,
 34 | 		int largeVertexPartitioningOutdegreeThreshold, boolean runPartitioningSuperstep,
 35 | 		boolean combine, Class<M> messageRepresentativeInstance,
 36 | 		Class<E> representativeEdgeInstance, GPSJobConfiguration jobConfiguration,
 37 | 		int numProcessorsForHandlingIO, int superstepNoToStopDynamism) {
 38 | 		super(localMachineId, commandLine, fileSystem, machineConfig, graphPartition, vertexFactory,
 39 | 			graphSize, outgoingBufferSizes, outputFileName, messageSenderAndReceiverFactory,
 40 | 			incomingMessageStorage, pollingTime, maxMessagesToTransmitConcurrently,
 41 | 			numVerticesFrequencyToCheckOutgoingBuffers,
 42 | 			sleepTimeWhenOutgoingBuffersExceedThreshold, largeVertexPartitioningOutdegreeThreshold,
 43 | 			runPartitioningSuperstep, combine, messageRepresentativeInstance,
 44 | 			representativeEdgeInstance, jobConfiguration, numProcessorsForHandlingIO);
 45 | 		this.benefitThreshold = benefitThreshold;
 46 | 		this.edgeThreshold = edgeThreshold;
 47 | 		machineCommunicationHistogram = new int[getNumWorkers()];
 48 | //		incomingMessageStorage.setMachineCommunicationHistogram(machineCommunicationHistogram);
 49 | 		fasterMachines = new boolean[getNumWorkers()];
 50 | 		this.superstepNoToStopDynamism = superstepNoToStopDynamism;
 51 | 	}
 52 | 
 53 | 	@Override
 54 | 	protected void doExtraWorkBeforeVertexComputation() {
 55 | 		if (currentSuperstepNo > superstepNoToStopDynamism) {
 56 | 			return;
 57 | 		}
 58 | 		machineCommunicationHistogram = new int[getNumWorkers()];
 59 | //		System.out.println("Starting to dump machineCommunicationHistogram...");
 60 | //		for (int i = 0; i < getNumWorkers(); ++i) {
 61 | //			getLogger().info("" + machineCommunicationHistogram[i]);
 62 | //		}
 63 | //		System.out.println("End of dumping machineCommunicationHistogram...");
 64 | //		for (int i = 0; i < getNumWorkers(); ++i) {
 65 | //			machineCommunicationHistogram[i] = 0;
 66 | //		}
 67 | 	}
 68 | //
 69 | //	protected Integer putVertexIntoVerticesToMoveIfMaxCommunicationMachineIsNotLocalMachine(
 70 | //		int nodeId, Map<Integer, Integer> vertexIdMachineIdMap) {
 71 | //		int maxCommunicationMachineId = findIdOfMaxCommunicatedMachine();
 72 | //		if (maxCommunicationMachineId != getLocalMachineId()
 73 | //			&& machineCommunicationHistogram[maxCommunicationMachineId]
 74 | //			  >= (machineCommunicationHistogram[getLocalMachineId()] + benefitThreshold)) {
 75 | //			vertexIdMachineIdMap.put(nodeId, maxCommunicationMachineId);
 76 | //			return maxCommunicationMachineId;
 77 | //		} else {
 78 | //			return null;
 79 | //		}
 80 | //	}
 81 | 
 82 | 	protected int findIdOfMaxCommunicatedMachine() {
 83 | //		System.out.println("Finding maxCommunicationMachine...");
 84 | //		System.out.println("0: " + machineCommunicationHistogram[0]);
 85 | 		int maxIndex = 0;
 86 | 		int maxValue = machineCommunicationHistogram[0];
 87 | 		int numEqualMachines = 1;
 88 | 		for (int i = 1; i < machineCommunicationHistogram.length; ++i) {
 89 | 			int valueOfCurrentMachine = machineCommunicationHistogram[i];
 90 | //			System.out.println(i + ": " + machineCommunicationHistogram[i]);
 91 | 			if (valueOfCurrentMachine > maxValue) {
 92 | 				maxValue = valueOfCurrentMachine;
 93 | 				maxIndex = i;
 94 | 				numEqualMachines = 1;
 95 | 			} else if (valueOfCurrentMachine == maxValue) {
 96 | 				numEqualMachines++;
 97 | 				if (Math.random() <= ((double) 1.0 / (double) numEqualMachines)) {
 98 | 					maxIndex = i;
 99 | 				}
100 | 			}
101 | 		}
102 | //		System.out.println("End of finding maxCommunicationMachine...");
103 | 		return (int) maxIndex;
104 | 	}
105 | }


--------------------------------------------------------------------------------
/gps-rev-110/src/java/gps/writable/LongArrayWritable.java:
--------------------------------------------------------------------------------
 1 | package gps.writable;
 2 | 
 3 | import java.util.Arrays;
 4 | import org.apache.mina.core.buffer.IoBuffer;
 5 | 
 6 | public class LongArrayWritable extends MinaWritable {
 7 | 
 8 |   public long[] value;
 9 | 
10 |   public LongArrayWritable() {
11 |     this.value = new long[0];
12 |   }
13 | 
14 |   public LongArrayWritable(long[] value) {
15 |     this.value = value;
16 |   }
17 | 
18 |   /**
19 |    * Setter that does not deep copy.
20 |    *
21 |    * @param value Array.
22 |    */
23 |   public void set(long[] value) { this.value = value; }
24 | 
25 |   /**
26 |    * Getter.
27 |    *
28 |    * @return Array.
29 |    */
30 |   public long[] get() { return value; }
31 | 
32 | 
33 |   @Override
34 |   public int numBytes() {
35 |     return 4 + 8*value.length;
36 |   }
37 | 
38 |   @Override
39 |   public void write(IoBuffer ioBuffer) {
40 |     ioBuffer.putInt(value.length);
41 | 
42 |     for (long longValue : value) {
43 |       ioBuffer.putLong(longValue);
44 |     }
45 |   }
46 | 
47 |   @Override
48 |   public void read(IoBuffer ioBuffer) {
49 |     int length = ioBuffer.getInt();
50 |     this.value = new long[length];
51 | 
52 |     for (int i = 0; i < length; ++i) {
53 |       this.value[i] = ioBuffer.getLong();
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public int read(byte[] byteArray, int index) {
59 |     int length = readIntegerFromByteArray(byteArray, index);
60 | 
61 |     this.value = new long[length];
62 |     index += 4;
63 | 
64 |     for (int i = 0; i < length; ++i) {
65 |       this.value[i] = readLongFromByteArray(byteArray, index);
66 |       index += 8;
67 |     }
68 | 
69 |     return 4 + (8*length);
70 |   }
71 | 
72 |   @Override
73 |   public int read(IoBuffer ioBuffer, byte[] byteArray, int index) {
74 |     int length = ioBuffer.getInt();
75 |     writeIntegerToByteArray(byteArray, length, index);
76 |     index += 4;
77 | 
78 |     for (int i = 0; i < length; ++i) {
79 |       ioBuffer.get(byteArray, index, 8);
80 |       index += 8;
81 |     }
82 | 
83 |     return 4 + (8*length);
84 |   }
85 | 
86 |   @Override
87 |   public void combine(byte[] messageQueue, byte[] tmpArray) {
88 |     // Nothing to do. This writable is not combinable.
89 |   }
90 | 
91 |   @Override
92 |   public String toString() {
93 |     return Arrays.toString(value);
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/results/plots/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ###############
 4 | # Constants
 5 | ###############
 6 | BYTE_PER_GB = 1024*1024*1024.0
 7 | KB_PER_GB = 1024*1024.0
 8 | MB_PER_GB = 1024.0
 9 | 
10 | MS_PER_SEC = 1000.0
11 | SEC_PER_MIN = 60.0
12 | 
13 | ALGS = ('pagerank', 'sssp', 'wcc', 'mst')
14 | ALG_PR, ALG_SSSP, ALG_WCC, ALG_MST = ALGS
15 | ALG_PREMIZAN = 'premizan'
16 | 
17 | GRAPHS = ('livejournal', 'orkut', 'arabic', 'twitter', 'uk0705')
18 | GRAPH_LJ, GRAPH_OR, GRAPH_AR, GRAPH_TW, GRAPH_UK = GRAPHS
19 | 
20 | MACHINES = ('16', '32', '64', '128')
21 | 
22 | SYSTEMS = ('giraph', 'gps', 'mizan', 'graphlab')
23 | SYS_GIRAPH, SYS_GPS, SYS_MIZAN, SYS_GRAPHLAB = SYSTEMS
24 | 
25 | SYS_MODES = (('0','1'),      # Giraph: byte array, hash map
26 |              ('0','1','2'),  # GPS: none, LALP, dynamic
27 |              ('0',),         # Mizan: static
28 |              ('0','1'))      # GraphLab: sync, async
29 | SYSMODE_HASH = '1'           # premizan hash partitioning
30 | 
31 | # combination of all systems and their sys modes
32 | ALL_SYS = [(system, sysmode)
33 |             for system, sysmodes in zip(SYSTEMS, SYS_MODES)
34 |             for sysmode in sysmodes]
35 | 
36 | 
37 | # conversion modes
38 | MODES = (0, 1, 2)
39 | MODE_TIME, MODE_MEM, MODE_NET = MODES
40 | 
41 | # names for relevant statistics (indexed by "mode")
42 | STATS = (('run', 'io', 'tot'),                  # time
43 |          ('mem_min', 'mem_max', 'mem_avg'),     # memory
44 |          ('recv_min', 'recv_max', 'recv_avg',   # net
45 |           'sent_min', 'sent_max', 'sent_avg'))
46 | 


--------------------------------------------------------------------------------
/results/plots/gen-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | ./gen-data.py 0 > data_time.py
4 | ./gen-data.py 1 > data_mem.py
5 | ./gen-data.py 2 > data_net.py
6 | 
7 | ./gen-data.py 1 --master > data_mem_master.py
8 | ./gen-data.py 2 --master > data_net_master.py


--------------------------------------------------------------------------------
/results/plots/plot-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | ./plot.py 0 --save-eps
 4 | ./plot.py 0 --save-eps --total-time
 5 | ./plot.py 1 --save-eps
 6 | ./plot.py 1 --save-eps --plot-sum
 7 | ./plot.py 2 --save-eps --plot-sum
 8 | ./plot.py 2 --save-eps
 9 | 
10 | ./plot-with-cuts.py 0 --save-eps
11 | ./plot-with-cuts.py 2 --save-eps
12 | 
13 | ./plot.py 1 --master --save-eps
14 | ./plot.py 2 --master --save-eps
15 | 
16 | ./plot.py 1 --premizan --save-eps
17 | ./plot.py 2 --premizan --save-eps --plot-sum
18 | ./plot.py 2 --premizan --save-eps
19 | ./plot.py 1 --premizan --master --save-eps
20 | ./plot.py 2 --premizan --master --save-eps


--------------------------------------------------------------------------------
/results/plots/plot-paper.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | ./plot.py 0 --save-paper
4 | ./plot.py 1 --save-paper --plot-max
5 | ./plot.py 2 --save-paper --plot-sum
6 | 
7 | ./plot-with-cuts.py 0 --save-paper
8 | ./plot-with-cuts.py 2 --save-paper


--------------------------------------------------------------------------------