├── .DS_Store
├── README.md
└── hadoop
    ├── .DS_Store
    ├── target
        ├── maven-status
        │   └── maven-compiler-plugin
        │   │   ├── testCompile
        │   │       └── default-testCompile
        │   │       │   ├── createdFiles.lst
        │   │       │   └── inputFiles.lst
        │   │   └── compile
        │   │       └── default-compile
        │   │           ├── createdFiles.lst
        │   │           └── inputFiles.lst
        ├── HW3-1.0-SNAPSHOT.jar
        ├── test-classes
        │   └── HW3
        │   │   └── AppTest.class
        ├── maven-archiver
        │   └── pom.properties
        └── surefire-reports
        │   ├── HW3.AppTest.txt
        │   └── TEST-HW3.AppTest.xml
    ├── src
        └── main
        │   └── java
        │       ├── parser
        │           ├── Parser.java
        │           └── ParserImpl.java
        │       ├── enums
        │           └── PageRankEnums.java
        │       ├── topk
        │           ├── TopK.java
        │           ├── TopKReducer.java
        │           └── TopKMapper.java
        │       ├── driver
        │           └── DriverProgram.java
        │       ├── pagerank
        │           ├── PageRankImpl.java
        │           ├── PageRankReducer.java
        │           └── PageRankMapper.java
        │       ├── model
        │           └── Node.java
        │       └── parserjob
        │           └── ParserJob.java
    ├── Readme.txt
    ├── pom.xml
    ├── Makefile
    └── HW3.iml


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manthanthakker/hadoop-page-rank/master/.DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PageRankMapReduce
2 | PageRank Implementation for Map Reduce in Hadoop and Apache spark
3 | 


--------------------------------------------------------------------------------
/hadoop/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manthanthakker/hadoop-page-rank/master/hadoop/.DS_Store


--------------------------------------------------------------------------------
/hadoop/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst:
--------------------------------------------------------------------------------
1 | HW3/AppTest.class
2 | 


--------------------------------------------------------------------------------
/hadoop/target/HW3-1.0-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manthanthakker/hadoop-page-rank/master/hadoop/target/HW3-1.0-SNAPSHOT.jar


--------------------------------------------------------------------------------
/hadoop/target/test-classes/HW3/AppTest.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manthanthakker/hadoop-page-rank/master/hadoop/target/test-classes/HW3/AppTest.class


--------------------------------------------------------------------------------
/hadoop/target/maven-archiver/pom.properties:
--------------------------------------------------------------------------------
1 | #Generated by Maven
2 | #Sun Feb 25 13:59:30 EST 2018
3 | version=1.0-SNAPSHOT
4 | groupId=HW3
5 | artifactId=HW3
6 | 


--------------------------------------------------------------------------------
/hadoop/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst:
--------------------------------------------------------------------------------
1 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/test/java/HW3/AppTest.java
2 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/parser/Parser.java:
--------------------------------------------------------------------------------
 1 | package parser;
 2 | 
 3 | /**
 4 |  * @author Manthan Thakker
 5 |  * @project HW3
 6 |  * @date 2/19/18
 7 |  * @email thakker.m@husky.neu.edu
 8 |  */
 9 | public interface Parser {
10 | }
11 | 


--------------------------------------------------------------------------------
/hadoop/target/surefire-reports/HW3.AppTest.txt:
--------------------------------------------------------------------------------
1 | -------------------------------------------------------------------------------
2 | Test set: HW3.AppTest
3 | -------------------------------------------------------------------------------
4 | Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.011 sec
5 | 


--------------------------------------------------------------------------------
/hadoop/Readme.txt:
--------------------------------------------------------------------------------
 1 | 3 simple steps to run the Job (Used Joes file):
 2 | 
 3 | 1. Open the Makefile configure
 4 | 
 5 | local.input=  ### MENTION INPUT PATH LOCATION
 6 | local.output=### MENTION OUTPUT PATH LOCATION
 7 | 
 8 | 2. Type make alone on the terminal
 9 | 
10 | 3. You can see the output folder topkresults in the folder.
11 | 
12 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/enums/PageRankEnums.java:
--------------------------------------------------------------------------------
 1 | package enums;
 2 | 
 3 | /**
 4 |  * @author Manthan Thakker
 5 |  * @project HW3
 6 |  * @date 2/23/18
 7 |  * @email thakker.m@husky.neu.edu
 8 |  */
 9 | 
10 | /**
11 |  * Global Counters across mao reduce program
12 |  */
13 | public enum PageRankEnums {
14 |      UNIQUEPAGES,
15 |      DANGLINGNODESNEW,
16 |      K
17 | }
18 | 


--------------------------------------------------------------------------------
/hadoop/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst:
--------------------------------------------------------------------------------
 1 | parserjob/ParserJob$ParserMapper.class
 2 | model/Value.class
 3 | HW3/App.class
 4 | parser/ParserImpl$WikiParser.class
 5 | parser/Parser.class
 6 | topk/TopKMapper$1.class
 7 | pagerank/PageRankImpl.class
 8 | parserjob/ParserJob$ParserMapper$WikiParser.class
 9 | parser/ParserImpl.class
10 | topk/TopKReducer$1.class
11 | pagerank/PageRankMapper.class
12 | Enums/PageRankEnums.class
13 | driver/DriverProgram.class
14 | pagerank/PageRankReducer.class
15 | pagerank/PageRank.class
16 | topk/TopKReducer.class
17 | parserjob/ParserJob.class
18 | topk/TopKMapper.class
19 | topk/TopK.class
20 | 


--------------------------------------------------------------------------------
/hadoop/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst:
--------------------------------------------------------------------------------
 1 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/pagerank/PageRank.java
 2 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/pagerank/PageRankMapper.java
 3 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/topk/TopKMapper.java
 4 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/pagerank/PageRankReducer.java
 5 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/parserjob/ParserJob.java
 6 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/driver/DriverProgram.java
 7 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/parser/Parser.java
 8 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/topk/TopKReducer.java
 9 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/topk/TopK.java
10 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/model/Value.java
11 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/parser/ParserImpl.java
12 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/Enums/PageRankEnums.java
13 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/HW3/App.java
14 | /Users/trailbrazer/Desktop/MR/git/MR/MR/HW3/src/main/java/pagerank/PageRankImpl.java
15 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/topk/TopK.java:
--------------------------------------------------------------------------------
 1 | package topk;
 2 | 
 3 | import model.Node;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.NullWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | 
13 | /**
14 |  * @author Manthan Thakker
15 |  * @project HW3
16 |  * @date 2/23/18
17 |  * @email thakker.m@husky.neu.edu
18 |  */
19 | public class TopK {
20 | 
21 | 
22 |     public static void main(String[] args) throws Exception {
23 | 
24 |         Configuration conf = new Configuration();
25 |         Job job = Job.getInstance(conf, "Top K");
26 |         job.getConfiguration().set("UNIQUEPAGES", args[2]);
27 | 
28 |         job.getConfiguration().set("K",args[3]);
29 |         // Setup
30 |         job.setJarByClass(TopK.class);
31 |         job.setMapperClass(TopKMapper.class);
32 | 
33 |         //Mapper
34 |         job.setMapOutputKeyClass(NullWritable.class);
35 |         job.setMapOutputValueClass(Node.class);
36 | 
37 |         job.setReducerClass(TopKReducer.class);
38 |         //Reducer
39 |         job.setOutputKeyClass(Text.class);
40 |         job.setOutputValueClass(Text.class);
41 | 
42 |         FileInputFormat.addInputPath(job, new Path(args[0]));
43 |         FileOutputFormat.setOutputPath(job, new Path(args[1]));
44 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
45 |     }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/driver/DriverProgram.java:
--------------------------------------------------------------------------------
 1 | package driver;
 2 | 
 3 | import pagerank.PageRankImpl;
 4 | import parserjob.ParserJob;
 5 | import topk.TopK;
 6 | 
 7 | import java.io.FileInputStream;
 8 | import java.io.InputStream;
 9 | import java.util.Properties;
10 | 
11 | /**
12 |  * @author Manthan Thakker
13 |  * @project HW3
14 |  * @date 2/22/18
15 |  * @email thakker.m@husky.neu.edu
16 |  */
17 | public class DriverProgram {
18 | 
19 |     /**
20 |      * Initiates the execution
21 |      * @param args: The input and the ouput paths
22 |      * @throws Exception
23 |      */
24 |     public static void main(String args[]) throws Exception {
25 | 
26 |         // Phase 1
27 |         final String dataSetInput;
28 |         final String dataSetOutput;
29 | 
30 |         // Phase 2
31 |         final String pageRankInput;
32 |         final String pageRankOutput;
33 | 
34 |         // Phase 3
35 |         long UNIQUEPAGES;
36 |         final String topKInput;
37 |         final String topKoutput;
38 | 
39 |         long K=10;
40 | 
41 | 
42 |         topKInput = args[1] + "/10";
43 |         topKoutput = args[1] + "/output";
44 | 
45 |         String commandLine[] = new String[4];
46 |         commandLine[0] = args[0];
47 |         commandLine[1] = args[1];
48 |         UNIQUEPAGES = ParserJob.main(commandLine);
49 | 
50 |         commandLine[0] = args[1];
51 |         commandLine[2] = UNIQUEPAGES + "";
52 |         PageRankImpl.main(commandLine);
53 | 
54 |         commandLine[0] = topKInput;
55 |         commandLine[1] = topKoutput;
56 |         commandLine[2] = UNIQUEPAGES + "";
57 |         commandLine[3]=K+"";
58 |         TopK.main(commandLine);
59 | 
60 |     }
61 | 
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/hadoop/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <groupId>HW3</groupId>
 7 |     <artifactId>HW3</artifactId>
 8 |     <version>1.0-SNAPSHOT</version>
 9 |     <packaging>jar</packaging>
10 | 
11 |     <name>HW3</name>
12 |     <url>http://maven.apache.org</url>
13 | 
14 |     <properties>
15 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16 |     </properties>
17 | 
18 |     <dependencies>
19 | 
20 |         <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
21 |         <dependency>
22 |             <groupId>org.apache.hadoop</groupId>
23 |             <artifactId>hadoop-common</artifactId>
24 |             <version>2.7.3</version>
25 |             <scope>provided</scope>
26 |         </dependency>
27 | 
28 |         <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
29 |         <dependency>
30 |             <groupId>org.apache.hadoop</groupId>
31 |             <artifactId>hadoop-mapreduce-client-core</artifactId>
32 |             <version>3.0.0</version>
33 |         </dependency>
34 | 
35 |         <dependency>
36 |             <groupId>org.apache.hadoop</groupId>
37 |             <artifactId>hadoop-client</artifactId>
38 |             <version>2.2.0</version>
39 |         </dependency>
40 | 
41 |         <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core -->
42 |         <dependency>
43 |             <groupId>org.apache.hadoop</groupId>
44 |             <artifactId>hadoop-core</artifactId>
45 |             <version>1.2.1</version>
46 |         </dependency>
47 | 
48 | 
49 |         <dependency>
50 |             <groupId>junit</groupId>
51 |             <artifactId>junit</artifactId>
52 |             <version>3.8.1</version>
53 |             <scope>test</scope>
54 |         </dependency>
55 | 
56 |     </dependencies>
57 | </project>
58 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/pagerank/PageRankImpl.java:
--------------------------------------------------------------------------------
 1 | package pagerank;
 2 | 
 3 | import enums.PageRankEnums;
 4 | import model.Node;
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 | 
12 | 
13 | /**
14 |  * @author Manthan Thakker
15 |  * @project HW3
16 |  * @date 2/20/18
17 |  * @email thakker.m@husky.neu.edu
18 |  */
19 | public class PageRankImpl {
20 | 
21 | 
22 |     public static void main(String[] args) throws Exception {
23 | 
24 |         long deltaNew = 0l;
25 | 
26 | 
27 |         for (int i = 1; i < 11; i++) {
28 | 
29 |             Configuration conf = new Configuration();
30 | 
31 |             Job job = Job.getInstance(conf, "Page Rank");
32 | 
33 | 
34 |             job.setJarByClass(PageRankImpl.class);
35 |             // SETTING CONTEXT VARIABLES
36 |             job.getConfiguration().set("deltaOld", deltaNew + "");
37 |             job.getConfiguration().set("UNIQUEPAGES", args[2]);
38 | 
39 | 
40 |             // Mapper
41 |             job.setMapperClass(PageRankMapper.class);
42 |             job.setMapOutputKeyClass(Text.class);
43 |             job.setMapOutputValueClass(Node.class);
44 |             job.setReducerClass(PageRankReducer.class);
45 | 
46 |             //Reducer
47 |             job.setOutputKeyClass(Text.class);
48 |             job.setOutputValueClass(Text.class);
49 |             FileInputFormat.addInputPath(job, new Path(args[0] + "/" + (i - 1) ));
50 |             FileOutputFormat.setOutputPath(job, new Path(args[0] + "/" + (i) ));
51 |             job.waitForCompletion(true);
52 | 
53 |             //Getting the number of nodes
54 |             job.getConfiguration().setLong("numberOfNodes", 18000);
55 |             job.getConfiguration().setBoolean("iterate", true);
56 | 
57 |             // Assigning the dangling node value to old value to use in the next iteration
58 |             deltaNew = job.getCounters().findCounter(PageRankEnums.DANGLINGNODESNEW).getValue();
59 |             // initializiong the new delata dangling node to 0
60 |             job.getCounters().findCounter(PageRankEnums.DANGLINGNODESNEW).setValue(0l);
61 | 
62 |         }
63 |     }
64 | 
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/model/Node.java:
--------------------------------------------------------------------------------
 1 | package model;
 2 | 
 3 | import org.apache.hadoop.io.WritableComparable;
 4 | 
 5 | import java.io.DataInput;
 6 | import java.io.DataOutput;
 7 | import java.io.IOException;
 8 | import java.util.ArrayList;
 9 | import java.util.List;
10 | 
11 | /**
12 |  * @author Manthan Thakker
13 |  * @project HW3
14 |  * @date 2/20/18
15 |  * @email thakker.m@husky.neu.edu
16 |  */
17 | public class Node implements WritableComparable {
18 | 
19 |     public String id="DEFAULT";
20 |     public Double pageRank = -1.0;
21 |     public List<String> neighbors;
22 |     public boolean isNode=false;
23 |     public static final long SCALEUP = 1000000000l;
24 | 
25 |     public Node() {
26 |         this.id=id;
27 |         neighbors = new ArrayList<String>();
28 |         isNode = true;
29 |     }
30 | 
31 |     public Node(String id) {
32 |         this.id = id.trim();
33 |         this.pageRank = pageRank;
34 |         neighbors = new ArrayList<String>();
35 |         isNode = true;
36 |     }
37 | 
38 |     public Node(String id, Double pageRank) {
39 |         this.id = id.trim();
40 |         this.pageRank = pageRank;
41 |         neighbors = new ArrayList<String>();
42 |         isNode = false;
43 |     }
44 | 
45 |     // SERILIZATION AND DESERILIZATION METHODS
46 | 
47 |     public void write(DataOutput dataOutput) throws IOException {
48 | 
49 | 
50 |         dataOutput.writeUTF(id.trim());
51 |         dataOutput.writeBoolean(isNode);
52 |         dataOutput.writeDouble(pageRank);
53 |         String accumulate = "";
54 |         for (String neighbor : neighbors)
55 |             accumulate += neighbor.trim() + ",";
56 |         if (accumulate.length() > 0)
57 |             dataOutput.writeUTF(accumulate.substring(0, accumulate.length()));
58 |         else {
59 |             dataOutput.writeUTF(accumulate);
60 |         }
61 | 
62 |     }
63 | 
64 | 
65 | 
66 |     public void readFields(DataInput dataInput) throws IOException {
67 | 
68 |         id = dataInput.readUTF().trim();
69 |         isNode = dataInput.readBoolean();
70 |         pageRank = dataInput.readDouble();
71 |         neighbors = new ArrayList<String>();
72 |         String nei = dataInput.readUTF();
73 | 
74 |         String neighborsName[] = nei.split(",");
75 |         for (String neighbor : neighborsName) {
76 |             neighbors.add(neighbor.trim());
77 |         }
78 | 
79 | 
80 |     }
81 | 
82 |     @Override
83 |     public String toString() {
84 |         return "#" + pageRank + "#" + neighbors + "#" + isNode;
85 |     }
86 | 
87 | 
88 |     public int compareTo(Object o) {
89 |         return pageRank.compareTo(((Node)o).pageRank);
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/pagerank/PageRankReducer.java:
--------------------------------------------------------------------------------
 1 | package pagerank;
 2 | 
 3 | import enums.PageRankEnums;
 4 | import model.Node;
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Reducer;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.Iterator;
11 | 
12 | /**
13 |  * @author Manthan Thakker
14 |  * @project HW3
15 |  * @date 2/23/18
16 |  * @email thakker.m@husky.neu.edu
17 |  */
18 | public class PageRankReducer extends Reducer<Text, Node, Text, Text> {
19 | 
20 |     Long numberOfNodes;
21 |     Configuration configuration;
22 |     private final long SCALE_FACTOR = 1000000000000l;
23 | 
24 |     /**
25 |      * Initializes the state variables
26 |      * @param context
27 |      */
28 |     public void setup(Context context) {
29 |         configuration = context.getConfiguration();
30 |         numberOfNodes = Long.parseLong(context.getConfiguration().get("UNIQUEPAGES"));
31 |     }
32 | 
33 |     /**
34 |      *
35 |      * @param key: The Node id
36 |      * @param values: List of Nodes/Pagerank Contributions(isNode will be false)
37 |      * @param context: Context
38 |      * @throws IOException
39 |      * @throws InterruptedException
40 |      * All partial pagerank contributions for same nodeId will be routed to the same reduce call.
41 |      * A copy of the Node itself will be routed to the same reduce call.
42 |      * We add up all the partial contributions and then emit the new node with the updated pageRank.
43 |      */
44 |     public void reduce(Text key, Iterable<Node> values, Context context) throws IOException, InterruptedException {
45 | 
46 |         // Intializing variables:
47 |         Double pageRankTotal = 0.0;
48 |         Iterator<Node> iterable = values.iterator();
49 |         Node M = null;
50 | 
51 |         // Preparing the string to be outputed
52 |         String MString = "";
53 | 
54 |         while (iterable.hasNext()) {
55 |             Node node = iterable.next();
56 |             // If its a pagerank contribution or Actual Node.
57 |             if (node.isNode) {
58 |                 M = node;
59 |                 MString = "#" + M.neighbors + "#" + M.isNode;
60 |             } else {
61 |                 pageRankTotal += node.pageRank;
62 |             }
63 |         }
64 | 
65 |         // The pagerank formula
66 |         pageRankTotal = (0.15 / (numberOfNodes)) + (0.85 * pageRankTotal);
67 | 
68 |         if (M != null) {
69 |             context.write(new Text(M.id.trim()), new Text("#" + pageRankTotal + MString));
70 |         } else {
71 |             long pageRank = Double.valueOf(pageRankTotal * SCALE_FACTOR).longValue();
72 |             context.getCounter(PageRankEnums.DANGLINGNODESNEW).increment(pageRank);
73 |             context.write(key, new Text("#" + pageRankTotal + "#[]#true"));
74 |         }
75 |     }
76 | 
77 | 
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/topk/TopKReducer.java:
--------------------------------------------------------------------------------
 1 | package topk;
 2 | 
 3 | import enums.PageRankEnums;
 4 | import model.Node;
 5 | import org.apache.hadoop.io.NullWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Reducer;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.*;
11 | 
12 | /**
13 |  * @author Manthan Thakker
14 |  * @project HW3
15 |  * @date 2/23/18
16 |  * @email thakker.m@husky.neu.edu
17 |  */
18 | public class TopKReducer extends Reducer<NullWritable, Node, Text, Text> {
19 |     private Map<String, Node> pages;
20 |     private long topK;
21 | 
22 |     /**
23 |      * Initialzes all the variables
24 |      *
25 |      * @param context
26 |      */
27 |     public void setup(Context context) {
28 |         pages = new HashMap<String, Node>();
29 |         this.topK = Long.parseLong(context.getConfiguration().get("K"));
30 |     }
31 | 
32 | 
33 |     /**
34 |      * @param key:          The Index of the Line
35 |      * @param nodeIterator: List of Nodes
36 |      * @param context       As we know all records will route to the same reduce call.
37 |      *                      Inserts each node and then just sorts and emits the top k results.
38 |      */
39 |     public void reduce(NullWritable key, Iterable<Node> nodeIterator, Context context) throws IOException, InterruptedException {
40 |         Iterator<Node> iterator = nodeIterator.iterator();
41 |         while (iterator.hasNext()) {
42 |             Node node = iterator.next();
43 | 
44 |             pages.put(node.pageRank + "#" + node.id, node);
45 |         }
46 |         pages = sortByComparator(pages, false);
47 |         int i = 0;
48 |         for (String page : pages.keySet()) {
49 |             context.write(new Text(""), new Text(page));
50 |             i++;
51 |             if (i > topK)
52 |                 break;
53 |         }
54 |     }
55 | 
56 |     /**
57 |      * Sorts the given Unsorted Map by PageRank Values
58 |      *
59 |      * @param unsortMap : The map to be sorted
60 |      * @param order:    False to be ascending
61 |      * @return Sorted Map Order
62 |      * Picked up sorting from the IR Project done in last semester.
63 |      */
64 |     private static Map<String, Node> sortByComparator(Map<String, Node> unsortMap, final boolean order) {
65 | 
66 |         List<Map.Entry<String, Node>> list = new LinkedList<Map.Entry<String, Node>>(unsortMap.entrySet());
67 | 
68 |         // Sorting the list based on values
69 |         Collections.sort(list, new Comparator<Map.Entry<String, Node>>() {
70 |             public int compare(Map.Entry<String, Node> o1,
71 |                                Map.Entry<String, Node> o2) {
72 |                 if (order) {
73 |                     return o1.getValue().pageRank.compareTo(o2.getValue().pageRank);
74 |                 } else {
75 |                     return (int) (((Double.parseDouble(o2.getKey().split("#")[0]) * 10000000000.0) - (Double.parseDouble(o1.getKey().split("#")[0])) * 10000000000.0) * 10000.0);
76 | 
77 |                 }
78 |             }
79 |         });
80 | 
81 |         // Maintaining insertion order with the help of LinkedList
82 |         Map<String, Node> sortedMap = new LinkedHashMap<String, Node>();
83 |         for (Map.Entry<String, Node> entry : list) {
84 |             sortedMap.put(entry.getKey(), entry.getValue());
85 |         }
86 | 
87 |         return sortedMap;
88 |     }
89 | }


--------------------------------------------------------------------------------
/hadoop/src/main/java/pagerank/PageRankMapper.java:
--------------------------------------------------------------------------------
  1 | package pagerank;
  2 | 
  3 | import enums.PageRankEnums;
  4 | import model.Node;
  5 | import org.apache.hadoop.conf.Configuration;
  6 | import org.apache.hadoop.io.Text;
  7 | import org.apache.hadoop.mapreduce.Mapper;
  8 | 
  9 | import java.io.IOException;
 10 | import java.util.Arrays;
 11 | 
 12 | /**
 13 |  * @author Manthan Thakker
 14 |  * @project HW3
 15 |  * @date 2/23/18
 16 |  * @email thakker.m@husky.neu.edu
 17 |  */
 18 | 
 19 | public class PageRankMapper extends Mapper<Object, Text, Text, Node> {
 20 | 
 21 |     double deltaOld = 0.0;
 22 |     Long numberOfNodes;
 23 |     Configuration configuration;
 24 |     private long SCALE_FACTOR = 10000000000000000l;
 25 | 
 26 | 
 27 |     /**
 28 |      * Initializes the variables from context
 29 |      * @param context
 30 |      */
 31 |     public void setup(Context context) {
 32 | 
 33 |         configuration = context.getConfiguration();
 34 |         deltaOld = Long.parseLong(context.getConfiguration().get("deltaOld"))*1.0 / (SCALE_FACTOR);
 35 |         numberOfNodes = Long.parseLong(context.getConfiguration().get("UNIQUEPAGES"));
 36 |     }
 37 | 
 38 | 
 39 |     /**
 40 |      *
 41 |      * @param nodeId: The Id of the Node
 42 |      * @param record: the record which contains the string representation of the node
 43 |      * @param context
 44 |      * @throws IOException
 45 |      * @throws InterruptedException
 46 |      *
 47 |      * This methods takes in Value as the Node String representation, converts it into object
 48 |      * It emits a copy of the same to the map reduce phase and then emits the page rank contribution for each
 49 |      * of the neighbor nodes.
 50 |      */
 51 |     public void map(Object nodeId, Text record, Context context) throws IOException, InterruptedException {
 52 | 
 53 |         /// Node in String Format Converted to the Node Object
 54 |         Node node = parseRecord(record.toString(), numberOfNodes);
 55 | 
 56 |         // Add contribution from dangling nodes to PageRank
 57 |         node.pageRank += 0.85 * (deltaOld / numberOfNodes);
 58 | 
 59 |         // Pass along the graph
 60 |         context.write(new Text(node.id.trim()), node);
 61 | 
 62 |         // Emit the pageRank contribution to the neighboring nodes
 63 |         Double p = 0.0;
 64 |         if (node.neighbors.size() > 1) {
 65 |             p = node.pageRank / (numberOfNodes);
 66 | 
 67 |             // Contribute Partial PageRank for each of its neighbor
 68 |             for (String n : node.neighbors) {
 69 | 
 70 |                 // Node with just pageRank, isNode Field is set to be false
 71 |                 Node pageRankNode = new Node(n, p);
 72 |                 pageRankNode.id = n.trim();
 73 |                 context.write(new Text(n.trim()), pageRankNode);
 74 | 
 75 |             }
 76 |         } else {
 77 | 
 78 |             // If a node has no neighbours than add to the dangling nodes new
 79 |             double pageRank = ((node.pageRank / (numberOfNodes)));
 80 |             context.getCounter(PageRankEnums.DANGLINGNODESNEW).increment((long) (pageRank * SCALE_FACTOR));
 81 |         }
 82 | 
 83 |     }
 84 | 
 85 |     /**
 86 |      *
 87 |      * @param record: String representation of the node
 88 |      * @param numberOfNodes: The unique page Count
 89 |      * @return Node object of the given String representation
 90 |      */
 91 |     public static Node parseRecord(String record, long numberOfNodes) {
 92 |         Node node = new Node();
 93 |         String fields[] = record.toString().split("#");
 94 | 
 95 |         node.id = fields[0].toString().trim();
 96 |         if (node.pageRank != -1.0)
 97 |             node.pageRank = Double.parseDouble(fields[1]);
 98 |         else
 99 |             node.pageRank = 1.0 / numberOfNodes;
100 |         String neighborsArr[] = fields[2].substring(1, fields[2].length() - 1).split(",");
101 |         node.neighbors = Arrays.asList(neighborsArr);
102 |         node.isNode = true;
103 |         return node;
104 |     }
105 | 
106 | }
107 | 
108 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/topk/TopKMapper.java:
--------------------------------------------------------------------------------
  1 | package topk;
  2 | 
  3 | import enums.PageRankEnums;
  4 | import model.Node;
  5 | import org.apache.hadoop.io.LongWritable;
  6 | import org.apache.hadoop.io.NullWritable;
  7 | import org.apache.hadoop.io.Text;
  8 | import org.apache.hadoop.mapreduce.Mapper;
  9 | 
 10 | import java.io.IOException;
 11 | import java.util.*;
 12 | 
 13 | /**
 14 |  * @author Manthan Thakker
 15 |  * @project HW3
 16 |  * @date 2/23/18
 17 |  * @email thakker.m@husky.neu.edu
 18 |  */
 19 | public class TopKMapper extends Mapper<LongWritable, Text, NullWritable, Node> {
 20 | 
 21 |     private Map<String, Node> pages;
 22 |     private long topK;
 23 | 
 24 | 
 25 |     /**
 26 |      * Iniitializes all tha variables
 27 |      * @param context
 28 |      */
 29 |     public void setup(Context context) {
 30 |         pages = new HashMap<String, Node>();
 31 |         this.topK = Long.parseLong(context.getConfiguration().get("K"));
 32 |     }
 33 | 
 34 | 
 35 |     /**
 36 |      *
 37 |      * @param key: The Index of the Line
 38 |      * @param value: The Node string representation.
 39 |      * @param context
 40 |      * Emits each node.
 41 |      */
 42 |     public void map(LongWritable key, Text value, Context context) {
 43 |         Node node = parseRecord(value.toString());
 44 |         pages.put(node.id, node);
 45 |     }
 46 | 
 47 | 
 48 |     /**
 49 |      * Sorts locally all the collected papges and emits only top k results
 50 |      * @param context
 51 |      * @throws IOException
 52 |      * @throws InterruptedException
 53 |      */
 54 |     public void cleanup(Context context) throws IOException, InterruptedException {
 55 |         pages = sortByComparator(pages, false);
 56 |         int i = 0;
 57 |         for (String page : pages.keySet()) {
 58 | 
 59 |             context.write( NullWritable.get(), pages.get(page));
 60 |             i++;
 61 |             if (i > topK)
 62 |                 break;
 63 |         }
 64 |     }
 65 | 
 66 | 
 67 |     /**
 68 |      *
 69 |      * @param record: String representation of the node
 70 |      * @return Node object of the given String representation
 71 |      */
 72 |     public static Node parseRecord(String record) {
 73 |         Node node = new Node();
 74 |         String fields[] = record.toString().split("#");
 75 | 
 76 | 
 77 |         node.id = fields[0].toString().trim();
 78 |         node.pageRank = Double.parseDouble(fields[1]);
 79 |         String neighborsArr[] = fields[2].substring(1, fields[2].length() - 1).split(",");
 80 | 
 81 |         node.neighbors = Arrays.asList(neighborsArr);
 82 |         node.isNode = true;
 83 | 
 84 |         return node;
 85 |     }
 86 | 
 87 |     /**
 88 |      * Sorts the given Unsorted Map by PageRank Values
 89 |      * @param unsortMap : The map to be sorted
 90 |      * @param order: False to be ascending
 91 |      * @return Sorted Map Order
 92 |      * Picked up sorting from the IR Project done in last semester.
 93 |      */
 94 |     private static Map<String, Node> sortByComparator(Map<String, Node> unsortMap, final boolean order) {
 95 | 
 96 |         List<Map.Entry<String, Node>> list = new LinkedList<Map.Entry<String, Node>>(unsortMap.entrySet());
 97 | 
 98 |         // Sorting the list based on values
 99 |         Collections.sort(list, new Comparator<Map.Entry<String, Node>>() {
100 |             public int compare(Map.Entry<String, Node> o1,
101 |                                Map.Entry<String, Node> o2) {
102 |                 if (order) {
103 |                     return o1.getValue().pageRank.compareTo(o2.getValue().pageRank);
104 |                 } else {
105 |                     return o2.getValue().pageRank.compareTo(o1.getValue().pageRank);
106 | 
107 |                 }
108 |             }
109 |         });
110 | 
111 |         // Maintaining insertion order with the help of LinkedList
112 |         Map<String, Node> sortedMap = new LinkedHashMap<String, Node>();
113 |         for (Map.Entry<String, Node> entry : list) {
114 |             sortedMap.put(entry.getKey(), entry.getValue());
115 |         }
116 | 
117 |         return sortedMap;
118 |     }
119 | }


--------------------------------------------------------------------------------
/hadoop/target/surefire-reports/TEST-HW3.AppTest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <testsuite tests="1" failures="0" name="HW3.AppTest" time="0.01" errors="0" skipped="0">
 3 |   <properties>
 4 |     <property name="java.runtime.name" value="Java(TM) SE Runtime Environment"/>
 5 |     <property name="sun.boot.library.path" value="/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib"/>
 6 |     <property name="java.vm.version" value="25.161-b12"/>
 7 |     <property name="gopherProxySet" value="false"/>
 8 |     <property name="java.vm.vendor" value="Oracle Corporation"/>
 9 |     <property name="maven.multiModuleProjectDirectory" value="/Users/trailbrazer/Desktop/MR/git/MR/MR/HW3"/>
10 |     <property name="java.vendor.url" value="http://java.oracle.com/"/>
11 |     <property name="path.separator" value=":"/>
12 |     <property name="guice.disable.misplaced.annotation.check" value="true"/>
13 |     <property name="java.vm.name" value="Java HotSpot(TM) 64-Bit Server VM"/>
14 |     <property name="file.encoding.pkg" value="sun.io"/>
15 |     <property name="user.country" value="US"/>
16 |     <property name="sun.java.launcher" value="SUN_STANDARD"/>
17 |     <property name="sun.os.patch.level" value="unknown"/>
18 |     <property name="java.vm.specification.name" value="Java Virtual Machine Specification"/>
19 |     <property name="user.dir" value="/Users/trailbrazer/Desktop/MR/git/MR/MR/HW3"/>
20 |     <property name="java.runtime.version" value="1.8.0_161-b12"/>
21 |     <property name="java.awt.graphicsenv" value="sun.awt.CGraphicsEnvironment"/>
22 |     <property name="java.endorsed.dirs" value="/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/endorsed"/>
23 |     <property name="os.arch" value="x86_64"/>
24 |     <property name="java.io.tmpdir" value="/var/folders/0j/qyqym4jd039gh386lg_6j27w0000gn/T/"/>
25 |     <property name="line.separator" value="
26 | "/>
27 |     <property name="java.vm.specification.vendor" value="Oracle Corporation"/>
28 |     <property name="os.name" value="Mac OS X"/>
29 |     <property name="classworlds.conf" value="/usr/local/Cellar/maven/3.5.2/libexec/bin/m2.conf"/>
30 |     <property name="sun.jnu.encoding" value="UTF-8"/>
31 |     <property name="java.library.path" value="/Users/trailbrazer/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:."/>
32 |     <property name="maven.conf" value="/usr/local/Cellar/maven/3.5.2/libexec/conf"/>
33 |     <property name="java.specification.name" value="Java Platform API Specification"/>
34 |     <property name="java.class.version" value="52.0"/>
35 |     <property name="sun.management.compiler" value="HotSpot 64-Bit Tiered Compilers"/>
36 |     <property name="os.version" value="10.13.3"/>
37 |     <property name="library.jansi.path" value="/usr/local/Cellar/maven/3.5.2/libexec/lib/jansi-native"/>
38 |     <property name="http.nonProxyHosts" value="local|*.local|169.254/16|*.169.254/16"/>
39 |     <property name="user.home" value="/Users/trailbrazer"/>
40 |     <property name="user.timezone" value="America/New_York"/>
41 |     <property name="java.awt.printerjob" value="sun.lwawt.macosx.CPrinterJob"/>
42 |     <property name="java.specification.version" value="1.8"/>
43 |     <property name="file.encoding" value="UTF-8"/>
44 |     <property name="user.name" value="trailbrazer"/>
45 |     <property name="java.class.path" value="/usr/local/Cellar/maven/3.5.2/libexec/boot/plexus-classworlds-2.5.2.jar"/>
46 |     <property name="java.vm.specification.version" value="1.8"/>
47 |     <property name="sun.arch.data.model" value="64"/>
48 |     <property name="java.home" value="/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre"/>
49 |     <property name="sun.java.command" value="org.codehaus.plexus.classworlds.launcher.Launcher clean package"/>
50 |     <property name="java.specification.vendor" value="Oracle Corporation"/>
51 |     <property name="user.language" value="en"/>
52 |     <property name="awt.toolkit" value="sun.lwawt.macosx.LWCToolkit"/>
53 |     <property name="java.vm.info" value="mixed mode"/>
54 |     <property name="java.version" value="1.8.0_161"/>
55 |     <property name="java.ext.dirs" value="/Users/trailbrazer/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java"/>
56 |     <property name="sun.boot.class.path" value="/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/jre/classes"/>
57 |     <property name="java.vendor" value="Oracle Corporation"/>
58 |     <property name="maven.home" value="/usr/local/Cellar/maven/3.5.2/libexec"/>
59 |     <property name="file.separator" value="/"/>
60 |     <property name="java.vendor.url.bug" value="http://bugreport.sun.com/bugreport/"/>
61 |     <property name="sun.cpu.endian" value="little"/>
62 |     <property name="sun.io.unicode.encoding" value="UnicodeBig"/>
63 |     <property name="socksNonProxyHosts" value="local|*.local|169.254/16|*.169.254/16"/>
64 |     <property name="ftp.nonProxyHosts" value="local|*.local|169.254/16|*.169.254/16"/>
65 |     <property name="sun.cpu.isalist" value=""/>
66 |   </properties>
67 |   <testcase classname="HW3.AppTest" name="testApp" time="0.01"/>
68 | </testsuite>


--------------------------------------------------------------------------------
/hadoop/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for MapReduce Page Rank project.
  2 | 
  3 | # Customize these paths for your environment.
  4 | # -----------------------------------------------------------
  5 | hadoop.root=/usr/local/Cellar/hadoop/2.8.2
  6 | jar.name=HW3-1.0-SNAPSHOT.jar
  7 | jar.path=target/${jar.name}
  8 | 
  9 | 
 10 | #######################################################################TA CONFIGURE HERE############
 11 | job.name=driver.DriverProgram
 12 | local.input=  ### MENTION INPUT PATH
 13 | local.output=### MENTION OUTPUT PATH
 14 | #######################################################################TA CONFIGURE  HERE############
 15 | 
 16 | # Pseudo-Cluster Execution
 17 | hdfs.user.name=manthan
 18 | hdfs.input=input1
 19 | hdfs.output=topKResults
 20 | 
 21 | 
 22 | # AWS EMR Execution
 23 | aws.emr.release=emr-5.2.1
 24 | aws.region=us-east-1
 25 | aws.bucket.name=inputdatasetmapreduce
 26 | aws.subnet.id=subnet-612a0f05
 27 | aws.input=input1
 28 | aws.output=output
 29 | aws.log.dir=log
 30 | aws.num.nodes=11
 31 | aws.instance.type=m4.large
 32 | # -----------------------------------------------------------
 33 | 
 34 | # Compiles code and builds jar (with dependencies).
 35 | jar:
 36 | 	mvn clean package
 37 | 
 38 | #Removes local output directory.
 39 | clean-local-output:
 40 | 	cp -vR ${local.output} ${local.input} ; rm -rf ${local.input}.* ; rm -rf ${local.output}
 41 | 
 42 | #clean-local-output:
 43 | #	rm -rf ${local.output}
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | # Runs standalone
 50 | # Make sure Hadoop  is set up (in /etc/hadoop files) for standalone operation (not pseudo-cluster).
 51 | # https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html#Standalone_Operation
 52 | alone: jar clean-local-output
 53 | 	${hadoop.root}/bin/hadoop jar ${jar.path} ${job.name} ${local.input} ${local.output}
 54 | 
 55 | #alone: jar
 56 | 
 57 | #${hadoop.root}/bin/hadoop jar ${jar.path} ${job.name} ${local.input} ${local.output}
 58 | 
 59 | # Start HDFS
 60 | start-hdfs:
 61 | 	${hadoop.root}/sbin/start-dfs.sh
 62 | 
 63 | # Stop HDFS
 64 | stop-hdfs:
 65 | 	${hadoop.root}/sbin/stop-dfs.sh
 66 | 
 67 | # Start YARN
 68 | start-yarn: stop-yarn
 69 | 	${hadoop.root}/sbin/start-yarn.sh
 70 | 
 71 | # Stop YARN
 72 | stop-yarn:
 73 | 	${hadoop.root}/sbin/stop-yarn.sh
 74 | 
 75 | # Reformats & initializes HDFS.
 76 | format-hdfs: stop-hdfs
 77 | 	rm -rf /tmp/hadoop*
 78 | 	${hadoop.root}/bin/hdfs namenode -format
 79 | 
 80 | # Initializes user & input directories of HDFS.
 81 | init-hdfs: start-hdfs
 82 | 	${hadoop.root}/bin/hdfs dfs -rm -r -f /user
 83 | 	${hadoop.root}/bin/hdfs dfs -mkdir /user
 84 | 	${hadoop.root}/bin/hdfs dfs -mkdir /user/${hdfs.user.name}
 85 | 	${hadoop.root}/bin/hdfs dfs -mkdir /user/${hdfs.user.name}/${hdfs.input}
 86 | 
 87 | # Load data to HDFS
 88 | upload-input-hdfs: start-hdfs
 89 | 	${hadoop.root}/bin/hdfs dfs -put ${local.input}/* /user/${hdfs.user.name}/${hdfs.input}
 90 | 
 91 | # Removes hdfs output directory.
 92 | clean-hdfs-output:
 93 | 	${hadoop.root}/bin/hdfs dfs -rm -r -f ${hdfs.output}*
 94 | 
 95 | # Download output from HDFS to local.
 96 | download-output:
 97 | 	mkdir ${local.output}
 98 | 	${hadoop.root}/bin/hdfs dfs -get ${hdfs.output}/* ${local.output}
 99 | 
100 | # Runs pseudo-clustered (ALL). ONLY RUN THIS ONCE, THEN USE: make pseudoq
101 | # Make sure Hadoop  is set up (in /etc/hadoop files) for pseudo-clustered operation (not standalone).
102 | # https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
103 | pseudo: jar stop-yarn format-hdfs init-hdfs upload-input-hdfs start-yarn clean-local-output
104 | 	${hadoop.root}/bin/hadoop jar ${jar.path} ${job.name} ${hdfs.input} ${hdfs.output}
105 | 	make download-output
106 | 
107 | # Runs pseudo-clustered (quickie).
108 | pseudoq: jar clean-local-output clean-hdfs-output
109 | 	${hadoop.root}/bin/hadoop jar ${jar.path} ${job.name} ${hdfs.input} ${hdfs.output}
110 | 	make download-output
111 | 
112 | # Create S3 bucket.
113 | make-bucket:
114 | 	aws s3 mb s3://${aws.bucket.name}
115 | 
116 | # Upload data to S3 input dir.
117 | upload-input-aws: make-bucket
118 | 	aws s3 sync ${local.input} s3://${aws.bucket.name}/${aws.input}
119 | 
120 | # Delete S3 output dir.
121 | delete-output-aws:
122 | 	aws s3 rm s3://${aws.bucket.name}/ --recursive --exclude "*" --include "${aws.output}*"
123 | 
124 | # Upload application to S3 bucket.
125 | upload-app-aws:
126 | 	aws s3 cp ${jar.path} s3://${aws.bucket.name}
127 | 
128 | # Main EMR launch.
129 | cloud: jar upload-app-aws delete-output-aws
130 | 	aws emr create-cluster \
131 | 		--name "6 Large machines small data set" \
132 | 		--release-label ${aws.emr.release} \
133 | 		--instance-groups '[{"InstanceCount":${aws.num.nodes},"InstanceGroupType":"CORE","InstanceType":"${aws.instance.type}"},{"InstanceCount":1,"InstanceGroupType":"MASTER","InstanceType":"${aws.instance.type}"}]' \
134 | 	    --applications Name=Hadoop \
135 | 	    --steps '[{"Args":["${job.name}","s3://${aws.bucket.name}/${aws.input}","s3://${aws.bucket.name}/${aws.output}"],"Type":"CUSTOM_JAR","Jar":"s3://${aws.bucket.name}/${jar.name}","ActionOnFailure":"TERMINATE_CLUSTER","Name":"Custom JAR"}]' \
136 | 		--log-uri s3://${aws.bucket.name}/${aws.log.dir} \
137 | 		--service-role EMR_DefaultRole \
138 | 		--ec2-attributes InstanceProfile=EMR_EC2_DefaultRole,SubnetId=${aws.subnet.id} \
139 | 		--region ${aws.region} \
140 | 		--enable-debugging \
141 | 		--auto-terminate
142 | 
143 | # Download output from S3.
144 | download-output-aws: clean-local-output
145 | 	mkdir ${local.output}
146 | 	aws s3 sync s3://${aws.bucket.name}/${aws.output} ${local.output}
147 | 
148 | # Change to standalone mode.
149 | switch-standalone:
150 | 	cp config/standalone/*.xml ${hadoop.root}/etc/hadoop
151 | 
152 | # Change to pseudo-cluster mode.
153 | switch-pseudo:
154 | 	cp config/pseudo/*.xml ${hadoop.root}/etc/hadoop
155 | 
156 | # Package for release.
157 | distro:
158 | 	rm -rf build
159 | 	mkdir build
160 | 	mkdir build/deliv
161 | 	mkdir build/deliv/WordCount
162 | 	cp pom.xml build/deliv/WordCount
163 | 	cp -r src build/deliv/WordCount
164 | 	cp Makefile build/deliv/WordCount
165 | 	cp README.txt build/deliv/WordCount
166 | 	tar -czf WordCount.tar.gz -C build/deliv WordCount
167 | 	cd build/deliv && zip -rq ../../WordCount.zip WordCount
168 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/parser/ParserImpl.java:
--------------------------------------------------------------------------------
  1 | package parser;
  2 | 
  3 | import java.io.*;
  4 | import java.net.URLDecoder;
  5 | import java.util.LinkedList;
  6 | import java.util.List;
  7 | import java.util.regex.Matcher;
  8 | import java.util.regex.Pattern;
  9 | 
 10 | import javax.xml.parsers.SAXParser;
 11 | import javax.xml.parsers.SAXParserFactory;
 12 | 
 13 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 14 | import org.xml.sax.Attributes;
 15 | import org.xml.sax.InputSource;
 16 | import org.xml.sax.SAXException;
 17 | import org.xml.sax.XMLReader;
 18 | import org.xml.sax.helpers.DefaultHandler;
 19 | 
 20 | /**
 21 |  * @author Manthan Thakker
 22 |  * @project HW3
 23 |  * @date 2/19/18
 24 |  * @email thakker.m@husky.neu.edu
 25 |  */
 26 | public class ParserImpl implements Parser {
 27 |     private static Pattern namePattern;
 28 |     private static Pattern linkPattern;
 29 | 
 30 |     static {
 31 |         // Keep only html pages not containing tilde (~).
 32 |         namePattern = Pattern.compile("^([^~]+)$");
 33 |         // Keep only html filenames ending relative paths and not containing tilde (~).
 34 |         linkPattern = Pattern.compile("^\\..*/([^~]+)\\.html$");
 35 |     }
 36 | 
 37 |     public static void main(String[] args) {
 38 | 
 39 |         // Path of the file
 40 |         String path = "/Users/trailbrazer/Desktop/MR/HW3/input/wikipedia-simple-html.bz2";
 41 | 
 42 | 
 43 |         long count=0;
 44 | 
 45 |         BufferedReader reader = null;
 46 |         try {
 47 |             File inputFile = new File(path);
 48 |             if (!inputFile.exists() || inputFile.isDirectory() || !inputFile.getName().endsWith(".bz2")) {
 49 |                 System.out.println("Input File does not exist or not bz2 file: " + path);
 50 |                 System.exit(1);
 51 |             }
 52 | 
 53 |             // Configure parser.
 54 |             SAXParserFactory spf = SAXParserFactory.newInstance();
 55 |             spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
 56 |             SAXParser saxParser = spf.newSAXParser();
 57 |             XMLReader xmlReader = saxParser.getXMLReader();
 58 |             // Parser fills this list with linked page names.
 59 |             List<String> linkPageNames = new LinkedList();
 60 |             xmlReader.setContentHandler(new WikiParser(linkPageNames));
 61 | 
 62 |             BZip2CompressorInputStream inputStream = new BZip2CompressorInputStream(new FileInputStream(inputFile));
 63 |             reader = new BufferedReader(new InputStreamReader(inputStream));
 64 |             String line;
 65 | 
 66 |             while ((line = reader.readLine()) != null) {
 67 |                 count++;
 68 |                 // Each line formatted as (Wiki-page-name:Wiki-page-html).
 69 |                 int delimLoc = line.indexOf(':');
 70 |                 String page = line;
 71 |                 // replace & with &amp
 72 |                 line = line.replaceAll("&", "&amp;").trim();
 73 |                 String pageName = line.substring(0, delimLoc);
 74 |                 String html = line.substring(delimLoc + 1);
 75 |                 Matcher matcher = namePattern.matcher(pageName);
 76 |                 if (!matcher.find()) {
 77 |                     // Skip this html file, name contains (~).
 78 |                     continue;
 79 |                 }
 80 | 
 81 |                 // Parse page and fill list of linked pages.
 82 |                 linkPageNames.clear();
 83 |                 try {
 84 |                     xmlReader.parse(new InputSource(new StringReader(html)));
 85 |                 } catch (Exception e) {
 86 |                     // Discard ill-formatted pages.
 87 |                     continue;
 88 |                 }
 89 | 
 90 |             }
 91 | 
 92 |         }
 93 |         catch (EOFException e) {
 94 | 
 95 |         }catch (Exception e) {
 96 |             e.printStackTrace();
 97 |         }
 98 | 
 99 |         finally
100 |          {
101 |             try {
102 |                 reader.close();
103 |             } catch (IOException e) {
104 |                 e.printStackTrace();
105 |             }
106 |         }
107 | 
108 |     }
109 | 
110 |     /**
111 |      * Parses a Wikipage, finding links inside bodyContent div element.
112 |      */
113 |     private static class WikiParser extends DefaultHandler {
114 |         /**
115 |          * List of linked pages; filled by parser.
116 |          */
117 |         private List<String> linkPageNames;
118 |         /**
119 |          * Nesting depth inside bodyContent div element.
120 |          */
121 |         private int count = 0;
122 | 
123 |         public WikiParser(List<String> linkPageNames) {
124 |             super();
125 |             this.linkPageNames = linkPageNames;
126 |         }
127 | 
128 |         @Override
129 |         public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
130 |             super.startElement(uri, localName, qName, attributes);
131 |             if ("div".equalsIgnoreCase(qName) && "bodyContent".equalsIgnoreCase(attributes.getValue("id")) && count == 0) {
132 |                 // Beginning of bodyContent div element.
133 |                 count = 1;
134 |             } else if (count > 0 && "a".equalsIgnoreCase(qName)) {
135 |                 // Anchor tag inside bodyContent div element.
136 |                 count++;
137 |                 String link = attributes.getValue("href");
138 |                 if (link == null) {
139 |                     return;
140 |                 }
141 | 
142 |                 try {
143 |                     // Decode escaped characters in URL.
144 |                     link = URLDecoder.decode(link, "UTF-8");
145 |                 } catch (Exception e) {
146 |                     // Wiki-weirdness; use link as is.
147 |                 }
148 |                 // Keep only html filenames ending relative paths and not containing tilde (~).
149 |                 Matcher matcher = linkPattern.matcher(link);
150 |                 if (matcher.find()) {
151 |                     linkPageNames.add(matcher.group(1));
152 |                 }
153 |             } else if (count > 0) {
154 |                 // Other element inside bodyContent div.
155 |                 count++;
156 |             }
157 |         }
158 | 
159 |         @Override
160 |         public void endElement(String uri, String localName, String qName) throws SAXException {
161 |             super.endElement(uri, localName, qName);
162 |             if (count > 0) {
163 |                 // End of element inside bodyContent div.
164 |                 count--;
165 |             }
166 |         }
167 |     }
168 | 
169 | }
170 | 


--------------------------------------------------------------------------------
/hadoop/src/main/java/parserjob/ParserJob.java:
--------------------------------------------------------------------------------
  1 | package parserjob;
  2 | 
  3 | import enums.PageRankEnums;
  4 | import model.Node;
  5 | import org.apache.hadoop.conf.Configuration;
  6 | import org.apache.hadoop.fs.Path;
  7 | import org.apache.hadoop.io.LongWritable;
  8 | import org.apache.hadoop.io.Text;
  9 | import org.apache.hadoop.mapreduce.Job;
 10 | import org.apache.hadoop.mapreduce.Mapper;
 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 13 | import org.xml.sax.Attributes;
 14 | import org.xml.sax.InputSource;
 15 | import org.xml.sax.SAXException;
 16 | import org.xml.sax.XMLReader;
 17 | import org.xml.sax.helpers.DefaultHandler;
 18 | 
 19 | import javax.xml.parsers.SAXParser;
 20 | import javax.xml.parsers.SAXParserFactory;
 21 | import java.io.IOException;
 22 | import java.io.StringReader;
 23 | import java.net.URLDecoder;
 24 | import java.util.*;
 25 | import java.util.regex.Matcher;
 26 | import java.util.regex.Pattern;
 27 | 
 28 | 
 29 | /**
 30 |  * @author Manthan Thakker
 31 |  * @project HW3
 32 |  * @date 2/20/18
 33 |  * @email thakker.m@husky.neu.edu
 34 |  */
 35 | public class ParserJob {
 36 | 
 37 |     private static Pattern namePattern;
 38 |     private static Pattern linkPattern;
 39 | 
 40 |     static {
 41 |         // Keep only html pages not containing tilde (~).
 42 |         namePattern = Pattern.compile("^([^~]+)$");
 43 |         // Keep only html filenames ending relative paths and not containing tilde (~).
 44 |         linkPattern = Pattern.compile("^\\..*/([^~]+)\\.html$");
 45 |     }
 46 | 
 47 | 
 48 |     public static class ParserMapper extends Mapper<LongWritable, Text, Text, Node> {
 49 | 
 50 |         private Set uniquePages;
 51 | 
 52 |         public void setup(Context context) {
 53 |             uniquePages = new HashSet();
 54 | 
 55 |         }
 56 | 
 57 |         public void map(LongWritable key, Text node, Context context) throws IOException, InterruptedException {
 58 | 
 59 | 
 60 |             try {
 61 |                 // Configure parser.
 62 |                 SAXParserFactory spf = SAXParserFactory.newInstance();
 63 |                 spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
 64 |                 SAXParser saxParser = spf.newSAXParser();
 65 |                 XMLReader xmlReader = saxParser.getXMLReader();
 66 |                 // Parser fills this list with linked page names.
 67 |                 List<String> linkPageNames = new LinkedList<String>();
 68 |                 xmlReader.setContentHandler(new WikiParser(linkPageNames));
 69 | 
 70 | 
 71 |                 String line = node.toString();
 72 | 
 73 |                 Text keyToEmit = new Text();
 74 | 
 75 | 
 76 |                 // Each line formatted as (Wiki-page-name:Wiki-page-html).
 77 |                 int delimLoc = line.indexOf(':');
 78 |                 String page = line;
 79 |                 // replace & with &amp
 80 |                 line = line.replaceAll("&", "&amp;").trim();
 81 |                 String pageName = line.substring(0, delimLoc);
 82 |                 String html = line.substring(delimLoc + 1);
 83 |                 Matcher matcher = namePattern.matcher(pageName);
 84 |                 if (!matcher.find()) {
 85 |                     // Skip this html file, name contains (~).
 86 |                     return;
 87 |                 }
 88 | 
 89 | 
 90 |                 // Parse page and fill list of linked pages.
 91 |                 linkPageNames.clear();
 92 |                 try {
 93 |                     xmlReader.parse(new InputSource(new StringReader(html)));
 94 |                 } catch (Exception e) {
 95 |                     // Discard ill-formatted pages.
 96 |                     return;
 97 |                 }
 98 | 
 99 |                 Node newNode = new Node(pageName);
100 | 
101 |                 Set pagesSet=new HashSet();
102 |                 pagesSet.addAll(linkPageNames);
103 | 
104 |                 newNode.neighbors = new LinkedList<String>(pagesSet);
105 |                 uniquePages.add(pageName);
106 |                 newNode.pageRank=-1.0;
107 |                 keyToEmit.set(pageName);
108 |                 context.write(keyToEmit, newNode);
109 | 
110 | 
111 |             } catch (Exception e) {
112 |                 e.printStackTrace();
113 |             }
114 |         }
115 | 
116 |         public void cleanup(Context context) {
117 |             context.getCounter(PageRankEnums.UNIQUEPAGES).increment(uniquePages.size());
118 |         }
119 | 
120 |         /**
121 |          * Parses a Wikipage, finding links inside bodyContent div element.
122 |          */
123 |         private static class WikiParser extends DefaultHandler {
124 |             /**
125 |              * List of linked pages; filled by parser.
126 |              */
127 |             private List<String> linkPageNames;
128 |             /**
129 |              * Nesting depth inside bodyContent div element.
130 |              */
131 |             private int count = 0;
132 | 
133 |             public WikiParser(List<String> linkPageNames) {
134 |                 super();
135 |                 this.linkPageNames = linkPageNames;
136 |             }
137 | 
138 |             @Override
139 |             public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
140 |                 super.startElement(uri, localName, qName, attributes);
141 |                 if ("div".equalsIgnoreCase(qName) && "bodyContent".equalsIgnoreCase(attributes.getValue("id")) && count == 0) {
142 |                     // Beginning of bodyContent div element.
143 |                     count = 1;
144 |                 } else if (count > 0 && "a".equalsIgnoreCase(qName)) {
145 |                     // Anchor tag inside bodyContent div element.
146 |                     count++;
147 |                     String link = attributes.getValue("href");
148 |                     if (link == null) {
149 |                         return;
150 |                     }
151 | 
152 |                     try {
153 |                         // Decode escaped characters in URL.
154 |                         link = URLDecoder.decode(link, "UTF-8");
155 |                     } catch (Exception e) {
156 |                         // Wiki-weirdness; use link as is.
157 |                     }
158 |                     // Keep only html filenames ending relative paths and not containing tilde (~).
159 |                     Matcher matcher = linkPattern.matcher(link);
160 |                     if (matcher.find()) {
161 |                         linkPageNames.add(matcher.group(1));
162 |                     }
163 |                 } else if (count > 0) {
164 |                     // Other element inside bodyContent div.
165 |                     count++;
166 |                 }
167 |             }
168 | 
169 |             @Override
170 |             public void endElement(String uri, String localName, String qName) throws SAXException {
171 |                 super.endElement(uri, localName, qName);
172 |                 if (count > 0) {
173 |                     // End of element inside bodyContent div.
174 |                     count--;
175 |                 }
176 |             }
177 |         }
178 |     }
179 | 
180 | 
181 |     public static long main(String[] args) throws Exception {
182 | 
183 |         Configuration conf = new Configuration();
184 |         Job job = Job.getInstance(conf, "Parser");
185 | 
186 |         // Setup
187 |         job.setJarByClass(ParserJob.class);
188 |         job.setMapperClass(ParserMapper.class);
189 | 
190 | 
191 |         //Mapper
192 |         job.setMapOutputKeyClass(Text.class);
193 |         job.setMapOutputValueClass(Node.class);
194 | 
195 | 
196 |         FileInputFormat.addInputPath(job, new Path(args[0]));
197 |         FileOutputFormat.setOutputPath(job, new Path(args[1]+"/0"));
198 |         job.waitForCompletion(true);
199 |         return job.getCounters().findCounter(PageRankEnums.UNIQUEPAGES).getValue();
200 | 
201 | 
202 |     }
203 | }
204 | 


--------------------------------------------------------------------------------
/hadoop/HW3.iml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
  3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5">
  4 |     <output url="file://$MODULE_DIR$/target/classes" />
  5 |     <output-test url="file://$MODULE_DIR$/target/test-classes" />
  6 |     <content url="file://$MODULE_DIR$">
  7 |       <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
  8 |       <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
  9 |       <excludeFolder url="file://$MODULE_DIR$/target" />
 10 |     </content>
 11 |     <orderEntry type="inheritedJdk" />
 12 |     <orderEntry type="sourceFolder" forTests="false" />
 13 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-common:2.7.3" level="project" />
 14 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.7.3" level="project" />
 15 |     <orderEntry type="module-library">
 16 |       <library name="Maven: jdk.tools:jdk.tools:1.8">
 17 |         <CLASSES>
 18 |           <root url="jar:///Library/Java/JavaVirtualMachines/jdk1.8.0_161.jdk/Contents/Home/lib/tools.jar!/" />
 19 |         </CLASSES>
 20 |         <JAVADOC />
 21 |         <SOURCES />
 22 |       </library>
 23 |     </orderEntry>
 24 |     <orderEntry type="library" name="Maven: com.google.guava:guava:11.0.2" level="project" />
 25 |     <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
 26 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
 27 |     <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
 28 |     <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
 29 |     <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.4" level="project" />
 30 |     <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
 31 |     <orderEntry type="library" name="Maven: commons-net:commons-net:3.1" level="project" />
 32 |     <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
 33 |     <orderEntry type="library" scope="PROVIDED" name="Maven: javax.servlet:servlet-api:2.5" level="project" />
 34 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26" level="project" />
 35 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26" level="project" />
 36 |     <orderEntry type="library" scope="PROVIDED" name="Maven: javax.servlet.jsp:jsp-api:2.1" level="project" />
 37 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
 38 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
 39 |     <orderEntry type="library" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
 40 |     <orderEntry type="library" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.3-1" level="project" />
 41 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.3" level="project" />
 42 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.8.3" level="project" />
 43 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
 44 |     <orderEntry type="library" name="Maven: asm:asm:3.1" level="project" />
 45 |     <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
 46 |     <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
 47 |     <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.0" level="project" />
 48 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.1.2" level="project" />
 49 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.1.2" level="project" />
 50 |     <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
 51 |     <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
 52 |     <orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
 53 |     <orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
 54 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
 55 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
 56 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.10" level="project" />
 57 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.10" level="project" />
 58 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.9.13" level="project" />
 59 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.13" level="project" />
 60 |     <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.4" level="project" />
 61 |     <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
 62 |     <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.4.1" level="project" />
 63 |     <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
 64 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
 65 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.7.3" level="project" />
 66 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
 67 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
 68 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
 69 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
 70 |     <orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.7.1" level="project" />
 71 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
 72 |     <orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.7.1" level="project" />
 73 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-recipes:2.7.1" level="project" />
 74 |     <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:3.0.0" level="project" />
 75 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.htrace:htrace-core:3.1.0-incubating" level="project" />
 76 |     <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.6" level="project" />
 77 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
 78 |     <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
 79 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:3.0.0" level="project" />
 80 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-client:3.0.0" level="project" />
 81 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-common:3.0.0" level="project" />
 82 |     <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.11" level="project" />
 83 |     <orderEntry type="library" name="Maven: javax.servlet:javax.servlet-api:3.1.0" level="project" />
 84 |     <orderEntry type="library" name="Maven: org.eclipse.jetty:jetty-util:9.3.19.v20170502" level="project" />
 85 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-client:1.19" level="project" />
 86 |     <orderEntry type="library" name="Maven: com.google.inject:guice:4.0" level="project" />
 87 |     <orderEntry type="library" name="Maven: javax.inject:javax.inject:1" level="project" />
 88 |     <orderEntry type="library" name="Maven: aopalliance:aopalliance:1.0" level="project" />
 89 |     <orderEntry type="library" name="Maven: com.sun.jersey.contribs:jersey-guice:1.19" level="project" />
 90 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-servlet:1.19" level="project" />
 91 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.7.8" level="project" />
 92 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.7.8" level="project" />
 93 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.7.8" level="project" />
 94 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.7.8" level="project" />
 95 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs-client:3.0.0" level="project" />
 96 |     <orderEntry type="library" name="Maven: com.squareup.okhttp:okhttp:2.4.0" level="project" />
 97 |     <orderEntry type="library" name="Maven: com.squareup.okio:okio:1.4.0" level="project" />
 98 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.7.8" level="project" />
 99 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.7.8" level="project" />
100 |     <orderEntry type="library" name="Maven: com.google.inject.extensions:guice-servlet:4.0" level="project" />
101 |     <orderEntry type="library" name="Maven: io.netty:netty:3.10.5.Final" level="project" />
102 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-client:2.2.0" level="project" />
103 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs:2.2.0" level="project" />
104 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-app:2.2.0" level="project" />
105 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-common:2.2.0" level="project" />
106 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-server-common:2.2.0" level="project" />
107 |     <orderEntry type="library" name="Maven: com.sun.jersey.jersey-test-framework:jersey-test-framework-grizzly2:1.9" level="project" />
108 |     <orderEntry type="library" name="Maven: com.sun.jersey.jersey-test-framework:jersey-test-framework-core:1.9" level="project" />
109 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-grizzly2:1.9" level="project" />
110 |     <orderEntry type="library" name="Maven: org.glassfish.grizzly:grizzly-http:2.1.2" level="project" />
111 |     <orderEntry type="library" name="Maven: org.glassfish.grizzly:grizzly-framework:2.1.2" level="project" />
112 |     <orderEntry type="library" name="Maven: org.glassfish.gmbal:gmbal-api-only:3.0.0-b023" level="project" />
113 |     <orderEntry type="library" name="Maven: org.glassfish.external:management-api:3.0.0-b012" level="project" />
114 |     <orderEntry type="library" name="Maven: org.glassfish.grizzly:grizzly-http-server:2.1.2" level="project" />
115 |     <orderEntry type="library" name="Maven: org.glassfish.grizzly:grizzly-rcm:2.1.2" level="project" />
116 |     <orderEntry type="library" name="Maven: org.glassfish.grizzly:grizzly-http-servlet:2.1.2" level="project" />
117 |     <orderEntry type="library" name="Maven: org.glassfish:javax.servlet:3.1" level="project" />
118 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.2.0" level="project" />
119 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.2.0" level="project" />
120 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.2.0" level="project" />
121 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-core:1.2.1" level="project" />
122 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-math:2.1" level="project" />
123 |     <orderEntry type="library" name="Maven: tomcat:jasper-runtime:5.5.12" level="project" />
124 |     <orderEntry type="library" name="Maven: tomcat:jasper-compiler:5.5.12" level="project" />
125 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jsp-api-2.1:6.1.14" level="project" />
126 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:servlet-api-2.5:6.1.14" level="project" />
127 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jsp-2.1:6.1.14" level="project" />
128 |     <orderEntry type="library" name="Maven: ant:ant:1.6.5" level="project" />
129 |     <orderEntry type="library" name="Maven: commons-el:commons-el:1.0" level="project" />
130 |     <orderEntry type="library" name="Maven: hsqldb:hsqldb:1.8.0.10" level="project" />
131 |     <orderEntry type="library" name="Maven: oro:oro:2.0.8" level="project" />
132 |     <orderEntry type="library" name="Maven: org.eclipse.jdt:core:3.1.1" level="project" />
133 |     <orderEntry type="library" scope="TEST" name="Maven: junit:junit:3.8.1" level="project" />
134 |   </component>
135 | </module>


--------------------------------------------------------------------------------