├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── images
├── index.png
├── reverseindex.jpg
├── 分区.jpg
└── 简单模型.jpg
├── resources
├── comments.xml
├── friendsdata.txt
├── itemcf.csv
├── order.txt
├── people.csv
├── product.txt
└── rand.sh
└── src
└── main
└── java
├── InputOutputFormatTest
└── MultiInOutput.java
├── InvertedIndex
├── InvertedCombiner.java
├── InvertedJob.java
├── InvertedMapper.java
└── InvertedReducer.java
├── gradesAverage
└── GradesAverage.java
├── mapReduceTest
└── wordCount
│ └── WordCount.java
├── mapreduceProgram
├── DateSortAsc.java
├── DateSortDesc.java
├── FlowPartition.java
├── FlowSort.java
├── FlowStatistics.java
└── GroupMax.java
├── mergeMultipleFiles
├── MergeJob.java
├── MergeMapper.java
├── MyInputFormat.java
└── MyRecordReader.java
├── mutualFriend
├── DecomposeFriendsMapper.java
├── DecomposeFriendsReducer.java
├── JobControlRun.java
├── JobRun.java
├── MergeFriendsMapper.java
└── MergeFriendsReducer.java
├── shuffleTest
├── MonthAscTempDescSort.java
└── TempSort.java
├── ssdut
└── training
│ └── mapreduce
│ ├── counter
│ └── YearCounter.java
│ ├── datecount
│ ├── DateCount.java
│ ├── DateDistinct.java
│ ├── DateFilter.java
│ ├── DateGroup.java
│ ├── DateGroup2.java
│ ├── DatePartition.java
│ ├── DatePartition2.java
│ ├── DateSort.java
│ ├── DateSort2.java
│ └── DateSort3.java
│ ├── inputformat
│ ├── FixedLengthInput.java
│ ├── FixedLengthInput2.java
│ ├── KeyValueInput.java
│ ├── MultInput.java
│ ├── MultInput2.java
│ ├── NLineInput.java
│ └── SequenceInput.java
│ ├── itemcf
│ ├── StartRun.java
│ ├── Step1.java
│ ├── Step2.java
│ ├── Step3.java
│ ├── Step4.java
│ ├── Step5.java
│ └── Step6.java
│ ├── medianstddev
│ ├── MRDPUtils.java
│ ├── MedianStdDevJob.java
│ ├── MedianStdDevMapper.java
│ ├── MedianStdDevReducer.java
│ └── MedianStdDevTuple.java
│ ├── minmaxcount
│ ├── MRDPUtils.java
│ ├── MinMaxCountJob.java
│ ├── MinMaxCountMapper.java
│ ├── MinMaxCountReducer.java
│ └── MinMaxCountTuple.java
│ ├── output
│ ├── CompressOutput.java
│ └── MultOutput.java
│ ├── peoplerank
│ ├── People.java
│ ├── PeopleRank.java
│ └── PeopleRank2.java
│ └── topten
│ ├── TopTenJob.java
│ ├── TopTenMapper.java
│ └── TopTenReducer.java
└── weblog
├── FlowCount.java
├── IPCount.java
├── Missed.java
├── PVMinMax.java
├── PVMinMax2.java
└── PVTopTen.java
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled class file
2 | *.class
3 |
4 | # Log file
5 | *.log
6 |
7 | # BlueJ files
8 | *.ctxt
9 |
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 |
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 |
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 josonle
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/images/index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josonle/MapReduce-Demo/2f057a5add4f623804f7c102a8ac16c7a52ad946/images/index.png
--------------------------------------------------------------------------------
/images/reverseindex.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josonle/MapReduce-Demo/2f057a5add4f623804f7c102a8ac16c7a52ad946/images/reverseindex.jpg
--------------------------------------------------------------------------------
/images/分区.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josonle/MapReduce-Demo/2f057a5add4f623804f7c102a8ac16c7a52ad946/images/分区.jpg
--------------------------------------------------------------------------------
/images/简单模型.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josonle/MapReduce-Demo/2f057a5add4f623804f7c102a8ac16c7a52ad946/images/简单模型.jpg
--------------------------------------------------------------------------------
/resources/comments.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/resources/friendsdata.txt:
--------------------------------------------------------------------------------
1 | A:B,C,D,F,E,O
2 | B:A,C,E,K
3 | C:F,A,D,I
4 | D:A,E,F,L
5 | E:B,C,D,M,L
6 | F:A,B,C,D,E,O,M
7 | G:A,C,D,E,F
8 | H:A,C,D,E,O
9 | I:A,O
10 | J:B,O
11 | K:A,C,D
12 | L:D,E,F
13 | M:E,F,G
14 | O:A,H,I,J
--------------------------------------------------------------------------------
/resources/order.txt:
--------------------------------------------------------------------------------
1 | 1001 20150710 P0001 2
2 | 1002 20150710 P0001 3
3 | 1002 20150710 P0002 3
4 | 1003 20150710 P0003 3
--------------------------------------------------------------------------------
/resources/people.csv:
--------------------------------------------------------------------------------
1 | a,b
2 | a,c
3 | a,d
4 | b,a
5 | b,d
6 | c,a
7 | d,b
8 | d,c
9 |
--------------------------------------------------------------------------------
/resources/product.txt:
--------------------------------------------------------------------------------
1 | P0001 小米5 1001 2
2 | P0002 锤子T1 1000 3
3 | P0003 锤子 1002 4
--------------------------------------------------------------------------------
/resources/rand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | num=$1
3 | [[ -z $num ]] && num=100
4 |
5 | for ((i=1;i<=$num;i++))
6 | do
7 | year=$(expr $RANDOM % 3 + 2015)
8 | month=$(expr $RANDOM % 12 + 1)
9 |
10 | case $month in
11 | 1 | 3 | 5 | 7 | 8 | 10 | 12)
12 | day=$(expr $RANDOM % 31 + 1)
13 | ;;
14 | 2)
15 | if [[ $year -eq 2016 && $month -eq 2 ]]
16 | then
17 | day=$(expr $RANDOM % 29 + 1)
18 | else
19 | day=$(expr $RANDOM % 28 + 1)
20 | fi
21 | ;;
22 | 4 | 6 | 9 | 11)
23 | day=$(expr $RANDOM % 30 + 1)
24 | ;;
25 | esac
26 |
27 | if [[ $month -lt 10 ]]
28 | then
29 | month=0$month
30 | fi
31 |
32 | if [[ $day -lt 10 ]]
33 | then
34 | day=0$day
35 | fi
36 |
37 | echo "$year-$month-$day:$i"
38 | done
39 |
--------------------------------------------------------------------------------
/src/main/java/InputOutputFormatTest/MultiInOutput.java:
--------------------------------------------------------------------------------
1 | package InputOutputFormatTest;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.fs.FileSystem;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.mapreduce.Job;
9 | import org.apache.hadoop.mapreduce.Mapper;
10 | import org.apache.hadoop.mapreduce.Reducer;
11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 | import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
13 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
14 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 | import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
16 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
17 | import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
18 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 |
20 | import ssdut.training.mapreduce.output.MultOutput;
21 | import ssdut.training.mapreduce.output.MultOutput.MultOutputMapper;
22 | import ssdut.training.mapreduce.output.MultOutput.MultOutputReducer;
23 |
24 | import org.apache.hadoop.io.IntWritable;
25 | import org.apache.hadoop.io.Text;
26 |
27 | public class MultiInOutput {
28 | public static class TxtFileMapper extends Mapper