└── src └── wikibooks └── hadoop ├── chapter10 ├── WordCount.java └── DepartureDelayCount.java ├── chapter05 ├── DelayCount.java ├── ArrivalDelayCount.java ├── DelayCountMapper.java ├── DepartureDelayCount.java ├── DelayCountWithCounter.java ├── ArrivalDelayCountMapper.java ├── DelayCountMapperWithCounter.java ├── DepartureDelayCountMapper.java ├── DelayCountWithMultipleOutputs.java ├── DelayCountMapperWithMultipleOutputs.java ├── DelayCountReducerWithMultipleOutputs.java ├── DelayCounters.java └── DelayCountReducer.java ├── chapter07 ├── MapsideJoin.java ├── ReducesideJoin.java ├── CarrierCodeMapper.java ├── MapperWithMapsideJoin.java ├── MapperWithReducesideJoin.java └── ReducerWithReducesideJoin.java ├── chapter06 ├── MapFileCreator.java ├── DateKeyComparator.java ├── SearchValueList.java ├── GroupKeyComparator.java ├── SequenceFileCreator.java ├── DelayCountWithDateKey.java ├── SequenceFileTotalSort.java ├── DelayCountMapperWithDateKey.java ├── DelayCountReducerWithDateKey.java ├── GroupKeyPartitioner.java └── DateKey.java ├── chapter03 └── SingleFileWriteRead.java ├── chapter08 ├── ArrivalDelayCountWithGzip.java ├── ArrivalDelayCountWithSnappy.java └── ArrivalDelayCountWithCombiner.java └── chapter04 ├── WordCountReducer.java ├── WordCountMapper.java └── WordCount.java /src/wikibooks/hadoop/chapter10/WordCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter10/WordCount.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCount.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/MapsideJoin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/MapsideJoin.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/MapFileCreator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/MapFileCreator.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/ReducesideJoin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/ReducesideJoin.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/ArrivalDelayCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/ArrivalDelayCount.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountMapper.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/DateKeyComparator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/DateKeyComparator.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/SearchValueList.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/SearchValueList.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/CarrierCodeMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/CarrierCodeMapper.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter03/SingleFileWriteRead.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter03/SingleFileWriteRead.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DepartureDelayCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DepartureDelayCount.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/GroupKeyComparator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/GroupKeyComparator.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/SequenceFileCreator.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/SequenceFileCreator.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter10/DepartureDelayCount.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter10/DepartureDelayCount.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountWithCounter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountWithCounter.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/DelayCountWithDateKey.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/DelayCountWithDateKey.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/SequenceFileTotalSort.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/SequenceFileTotalSort.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/MapperWithMapsideJoin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/MapperWithMapsideJoin.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/ArrivalDelayCountMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/ArrivalDelayCountMapper.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/MapperWithReducesideJoin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/MapperWithReducesideJoin.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountMapperWithCounter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountMapperWithCounter.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DepartureDelayCountMapper.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DepartureDelayCountMapper.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/DelayCountMapperWithDateKey.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/DelayCountMapperWithDateKey.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter07/ReducerWithReducesideJoin.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter07/ReducerWithReducesideJoin.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithGzip.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithGzip.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithSnappy.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithSnappy.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountWithMultipleOutputs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountWithMultipleOutputs.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/DelayCountReducerWithDateKey.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter06/DelayCountReducerWithDateKey.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithCombiner.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter08/ArrivalDelayCountWithCombiner.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountMapperWithMultipleOutputs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountMapperWithMultipleOutputs.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountReducerWithMultipleOutputs.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/beginninghadoop/master/src/wikibooks/hadoop/chapter05/DelayCountReducerWithMultipleOutputs.java -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCounters.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter05; 2 | 3 | public enum DelayCounters { 4 | not_available_arrival, scheduled_arrival, early_arrival, not_available_departure, scheduled_departure, early_departure; 5 | } 6 | -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/GroupKeyPartitioner.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter06; 2 | 3 | import org.apache.hadoop.io.IntWritable; 4 | import org.apache.hadoop.mapreduce.Partitioner; 5 | 6 | public class GroupKeyPartitioner extends Partitioner { 7 | 8 | @Override 9 | public int getPartition(DateKey key, IntWritable val, int numPartitions) { 10 | int hash = key.getYear().hashCode(); 11 | int partition = hash % numPartitions; 12 | return partition; 13 | } 14 | } -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter04/WordCountReducer.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter04; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | 9 | public class WordCountReducer extends 10 | Reducer { 11 | private IntWritable result = new IntWritable(); 12 | 13 | public void reduce(Text key, Iterable values, Context context) 14 | throws IOException, InterruptedException { 15 | int sum = 0; 16 | for (IntWritable val : values) { 17 | sum += val.get(); 18 | } 19 | result.set(sum); 20 | context.write(key, result); 21 | } 22 | } -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter05/DelayCountReducer.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter05; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | 9 | public class DelayCountReducer extends 10 | Reducer { 11 | 12 | private IntWritable result = new IntWritable(); 13 | 14 | public void reduce(Text key, Iterable values, Context context) 15 | throws IOException, InterruptedException { 16 | int sum = 0; 17 | for (IntWritable value : values) 18 | sum += value.get(); 19 | result.set(sum); 20 | context.write(key, result); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter04/WordCountMapper.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter04; 2 | 3 | import java.io.IOException; 4 | import java.util.StringTokenizer; 5 | 6 | import org.apache.hadoop.io.IntWritable; 7 | import org.apache.hadoop.io.LongWritable; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Mapper; 10 | 11 | public class WordCountMapper extends 12 | Mapper { 13 | 14 | private final static IntWritable one = new IntWritable(1); 15 | private Text word = new Text(); 16 | 17 | public void map(LongWritable key, Text value, Context context) 18 | throws IOException, InterruptedException { 19 | StringTokenizer itr = new StringTokenizer(value.toString()); 20 | while (itr.hasMoreTokens()) { 21 | word.set(itr.nextToken()); 22 | context.write(word, one); 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter04/WordCount.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter04; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Job; 8 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 12 | 13 | public class WordCount { 14 | public static void main(String[] args) throws Exception { 15 | Configuration conf = new Configuration(); 16 | if (args.length != 2) { 17 | System.err.println("Usage: WordCount "); 18 | System.exit(2); 19 | } 20 | Job job = new Job(conf, "WordCount"); 21 | 22 | job.setJarByClass(WordCount.class); 23 | job.setMapperClass(WordCountMapper.class); 24 | job.setReducerClass(WordCountReducer.class); 25 | 26 | job.setInputFormatClass(TextInputFormat.class); 27 | job.setOutputFormatClass(TextOutputFormat.class); 28 | 29 | job.setOutputKeyClass(Text.class); 30 | job.setOutputValueClass(IntWritable.class); 31 | 32 | FileInputFormat.addInputPath(job, new Path(args[0])); 33 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 34 | 35 | job.waitForCompletion(true); 36 | } 37 | } -------------------------------------------------------------------------------- /src/wikibooks/hadoop/chapter06/DateKey.java: -------------------------------------------------------------------------------- 1 | package wikibooks.hadoop.chapter06; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | import org.apache.hadoop.io.WritableComparable; 8 | import org.apache.hadoop.io.WritableUtils; 9 | 10 | public class DateKey implements WritableComparable { 11 | 12 | private String year; 13 | private Integer month; 14 | 15 | public DateKey() { 16 | } 17 | 18 | public DateKey(String year, Integer date) { 19 | this.year = year; 20 | this.month = date; 21 | } 22 | 23 | public String getYear() { 24 | return year; 25 | } 26 | 27 | public void setYear(String year) { 28 | this.year = year; 29 | } 30 | 31 | public Integer getMonth() { 32 | return month; 33 | } 34 | 35 | public void setMonth(Integer month) { 36 | this.month = month; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return (new StringBuilder()).append(year).append(",").append(month) 42 | .toString(); 43 | } 44 | 45 | @Override 46 | public void readFields(DataInput in) throws IOException { 47 | year = WritableUtils.readString(in); 48 | month = in.readInt(); 49 | } 50 | 51 | @Override 52 | public void write(DataOutput out) throws IOException { 53 | WritableUtils.writeString(out, year); 54 | out.writeInt(month); 55 | } 56 | 57 | @Override 58 | public int compareTo(DateKey key) { 59 | int result = year.compareTo(key.year); 60 | if (0 == result) { 61 | result = month.compareTo(key.month); 62 | } 63 | return result; 64 | // if (this.year.compareTo(key.year) != 0) { 65 | // return this.year.compareTo(key.year); 66 | // } else if (this.distance != key.distance) { 67 | // return this.distance < key.distance ? -1 : 1; 68 | // } else { 69 | // return 0; 70 | // } 71 | } 72 | 73 | } 74 | --------------------------------------------------------------------------------