├── .gitattributes
├── .gitignore
├── Hadoop
    ├── README.md
    ├── WebKPI
    │   ├── KPIJob.java
    │   ├── KPI_OneIP_Sum.java
    │   ├── KPI_OnePV_Sum.java
    │   ├── KPI_OneRequest_Sum.java
    │   ├── KPI_OneSource_Sum.java
    │   ├── KPI_OneTime_Sum.java
    │   ├── KPIfilter.java
    │   └── hdfsGYT.java
    ├── bookTuijian
    │   ├── Step1.java
    │   ├── Step2.java
    │   ├── Step3_1.java
    │   ├── Step3_2.java
    │   ├── Step4.java
    │   ├── Step4_Updata.java
    │   ├── Step4_Updata2.java
    │   ├── bookRecommend.java
    │   ├── hdfsGYT.java
    │   ├── score.txt
    │   └── uid_to_bid.csv
    ├── hdfs
    │   └── hdfsGYT.java
    ├── multiple_In_Out
    │   ├── mulIn-output
    │   │   └── part-r-00000
    │   ├── mulOut-output
    │   │   ├── china-r-00000
    │   │   ├── cpeople-r-00000
    │   │   └── usa-r-00000
    │   ├── multipleinout.java
    │   ├── multipleinput_input
    │   ├── multipleoutput.java
    │   └── multipleoutput_input
    ├── pagerankjisuan
    │   ├── dataEtl.java
    │   ├── hdfsGYT.java
    │   ├── prJisuan.java
    │   ├── prMatrix.java
    │   ├── prNormal.java
    │   ├── prSort.java
    │   └── prjob.java
    ├── selfSort
    │   ├── input
    │   ├── output
    │   └── selfSort.java
    ├── sort_twice
    │   ├── Intpair.java
    │   ├── groupingComparator.java
    │   ├── input
    │   ├── myPartition.java
    │   ├── output
    │   ├── sort_twice.jar
    │   └── sort_twice.java
    ├── wordcount
    │   └── wordcount.java
    └── 二次排序
    │   ├── blogURL.txt
    │   ├── part-r-00000
    │   ├── sortTwice.jar
    │   ├── sortTwice.txt
    │   └── sorttwice
    │       ├── IntPair.java
    │       └── sortTwice.java
├── Hbase
    └── README.md
├── Hive
    ├── README.md
    └── hiveTableExample
    │   ├── complex_student
    │   ├── complex_student~
    │   ├── external_student
    │   ├── hiveQL
    │   ├── hiveQL~
    │   ├── partiton_student
    │   ├── partiton_student 2
    │   ├── partiton_student 2~
    │   ├── partiton_student2
    │   ├── partiton_student~
    │   ├── student.txt
    │   └── student.txt~
├── Java
    ├── Dataguru算法导论
    │   ├── BitTree
    │   │   └── tree.java
    │   ├── Graph
    │   │   ├── BFS.java
    │   │   ├── DFS.java
    │   │   ├── Dijkstra.java
    │   │   └── GraphTest.java
    │   ├── Hash
    │   │   └── hash.java
    │   ├── Link
    │   │   ├── DoubleLink.java
    │   │   ├── DoubleLinkTest.java
    │   │   ├── Link.java
    │   │   └── linkTest.java
    │   ├── Matrix
    │   │   ├── matrixCheng.java
    │   │   └── maxArr.java
    │   ├── Queue
    │   │   └── Queue.java
    │   ├── Statck
    │   │   ├── Statck1.java
    │   │   └── Statck2.java
    │   ├── TestCode
    │   │   ├── BitTreeExample.java
    │   │   ├── HashTableExample.java
    │   │   ├── fenZhiTest.java
    │   │   └── guibingTest.java
    │   └── sort
    │   │   ├── duiSort.java
    │   │   ├── guibing.java
    │   │   ├── insertSort.java
    │   │   └── quickSort.java
    └── 一些小项目
    │   └── README.md
├── Mahout
    └── README.md
├── README.md
├── Spark
    ├── ChineseWordSplitCount
    │   ├── WordAnalyzer jar包链接.txt
    │   ├── blog href.txt
    │   └── wordSplitCount.py
    ├── PageRank
    │   └── Jar包链接.txt
    ├── README.md
    └── pairRDD
    │   ├── driver
    │   ├── example
    │   ├── example~
    │   └── sample
└── cluster_conf
    ├── README.md
    ├── master1
        ├── core-site.xml
        ├── hdfs-site.xml
        ├── mapred-site.xml
        └── yarn-site.xml
    ├── slave1
        ├── core-site.xml
        ├── hdfs-site.xml
        ├── mapred-site.xml
        └── yarn-site.xml
    └── slave2
        ├── core-site.xml
        ├── hdfs-site.xml
        ├── mapred-site.xml
        └── yarn-site.xml


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/Hadoop/README.md:
--------------------------------------------------------------------------------
1 | 本目录下主要是我对hadoop两个下属hdfs和mapreduce的操作代码托管地方，代码质量不一定高，但是尽我所能去写好每次的code
2 | 
3 | 使用java封装hdfs操作实例，其对应的博客：http://blog.csdn.net/gamer_gyt/article/details/50985606
4 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/KPIJob.java:
--------------------------------------------------------------------------------
 1 | package WebKPI;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | 
 8 | public class KPIJob {
 9 |     //定义全局变量 hdfs地址url	
10 | 	public static final String  HDFS = "hdfs://127.0.0.1:9000";
11 | 	
12 | 	public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
13 | 		//定义一个map集合，存放程序中所需要的路径
14 | 		Map  <String, String> path= new HashMap<String, String>();
15 | 		
16 | //		path.put("local_path", "webLogKPI/weblog/access.log");          //本地目录
17 | 		path.put("input_log", HDFS+"/mr/webLogKPI/log_files");  //hdfs上存放log的目录
18 | 		
19 | 		path.put("output_oneip", HDFS + "/mr/webLogKPI/KPI_OneIP_Sum");   //hdfs上KPI_OneIP_Sum对应的输出文件
20 | 		path.put("output_pv", HDFS + "/mr/webLogKPI/KPI_OnePV_Sum");   //hdfs上KPI_OnePV_Sum对应的输出文件
21 | 		path.put("output_request",HDFS+"/mr/webLogKPI/KPI_OneRequest_Sum");  //hdfs 上KPI_OneRequest_Sum对应的输出文件
22 | 		path.put("output_time", HDFS+"/mr/webLogKPI/KPI_OneTime_Sum");              //hdfs上KPI_OneTime_Sum对应的输出文件
23 | 		path.put("output_source", HDFS+"/mr/webLogKPI/KPI_OneResource_Sum");              //hdfs上KPI_OneResource_Sum对应的输出文件
24 | 		
25 | 		KPI_OneIP_Sum.main(path);    //计算独立IP访问量
26 | 		KPI_OnePV_Sum.main(path);    //计算PV访问量
27 | 		KPI_OneRequest_Sum.main(path);        //获得请求方式
28 | 		KPI_OneTime_Sum.main(path);          //每小时的PV
29 | 		KPI_OneSource_Sum.main(path);          //日访问设备统计
30 | 		
31 | 		System.exit(0);
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/KPI_OnePV_Sum.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.text.ParseException;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.hadoop.conf.Configuration;
  9 | import org.apache.hadoop.fs.Path;
 10 | import org.apache.hadoop.io.LongWritable;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.mapreduce.InputSplit;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Reducer;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 18 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | 
 23 | public class KPI_OnePV_Sum {
 24 | 	
 25 | 	private static KPIfilter kpi; //声明一个KPIfilter对象
 26 | 	
 27 | 	//Mapper类
 28 | 	public static class PVMap extends Mapper<LongWritable, Text, Text, LongWritable>{
 29 | 
 30 | 		private static String filename ;//整个Map函数使用这个变量，意思为 获取当前文件的名称
 31 | 		private static Text pvK1 = new Text();
 32 | 		private static LongWritable pvV1 = new LongWritable(1);
 33 | 		
 34 | 		//获取文件名，setup函数,每次执行一个Map类时只调用一次
 35 | 		@Override
 36 | 		protected void setup(Context context) throws IOException,InterruptedException {
 37 | 			// TODO Auto-generated method stub
 38 | 			InputSplit input = context.getInputSplit();
 39 | 			filename = ((FileSplit) input).getPath().getName();             //获得的是形如 26-Apr-2016.txt
 40 | 			filename = filename.substring(0, 11).replace("-","");           //转换为： 26Apr2016
 41 | 			System.out.println("filename：" + filename);
 42 | 		}
 43 | 		
 44 | 		public void map(LongWritable key, Text value ,Context context) throws IOException, InterruptedException{
 45 | 			try {
 46 | 				kpi = KPIfilter.filterPVs(value.toString());
 47 | 				if(kpi.isValid())
 48 | 				{
 49 | 					pvK1.set(kpi.getSee_url() + "\t" + filename);           //key设置为从log中解析出的访问入口
 50 | 					context.write(pvK1, pvV1);
 51 | 				}
 52 | 			} catch (ParseException e) {
 53 | 				// TODO Auto-generated catch block
 54 | //				e.printStackTrace();
 55 | //				System.out.println("This is some error");
 56 | 			}
 57 | 		}
 58 | 		
 59 | 	}
 60 | 	//Reducer类
 61 | 	public static class PVReduce extends Reducer<Text,LongWritable,Text,LongWritable>{
 62 | 		
 63 | 		private static Text pvk2 = new Text();          //key
 64 | 		private static LongWritable pvV2 = new LongWritable(); //value
 65 | 		
 66 | 		//声明mos变量，将不同日期的处理结果写进不同的文件
 67 | 		private MultipleOutputs<Text, LongWritable> mos;
 68 | 		
 69 | 		//reduce类中的setup函数
 70 | 		@Override
 71 | 		protected void setup(org.apache.hadoop.mapreduce.Reducer.Context context)	throws IOException, InterruptedException {
 72 | 			// TODO Auto-generated method stub
 73 | 			mos = new MultipleOutputs<Text, LongWritable>(context);
 74 | 		}
 75 | 		
 76 | 		public void reduce(Text key, Iterable<LongWritable> values, Context contexts) throws IOException, InterruptedException{
 77 | 			String[] arr = key.toString().split("\t");
 78 | 			String filename = arr[1];
 79 | 			
 80 | 			//统计每个指定页面的日访问量
 81 | 			int seeNum = 0;
 82 | 			for(LongWritable w : values)
 83 | 			{
 84 | 				seeNum += w.get();
 85 | 			}
 86 | 			
 87 | 			pvk2.set(arr[0]);
 88 | 			pvV2.set(seeNum);
 89 | //			System.out.println(filename + "______________"  + pvk2 + "===========" + pvV2);
 90 | 			mos.write(filename, pvk2, pvV2);
 91 | 		}
 92 | 		
 93 | 		//cleanup函数  关闭mos
 94 | 				public void cleanup(Context context) throws IOException,InterruptedException {
 95 | 						mos.close();
 96 | 					}
 97 | 	}
 98 | 	
 99 | 	//main函数
100 | 	public static void main(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
101 | 		String hdfs_input = path.get("input_log");          //loghdfs存放
102 | 		String hdfs_output = path.get("output_pv");   //pv输出的目录
103 | 		
104 | 		hdfsGYT hdfs = new hdfsGYT();
105 | 		hdfs.rmr(hdfs_output);    //如果存在输出的目录的首先删除，否则会报错
106 | 		
107 | 		Job job = new Job(new Configuration(), "PV");
108 | 		job.setJarByClass(KPI_OnePV_Sum.class);
109 | 		
110 | 		job.setMapperClass(PVMap.class);
111 | 		job.setReducerClass(PVReduce.class);
112 | 		
113 | 		job.setMapOutputKeyClass(Text.class);
114 | 		job.setMapOutputValueClass(LongWritable.class);
115 | 		
116 | 		job.setOutputKeyClass(Text.class);
117 | 		job.setOutputValueClass(LongWritable.class);
118 | 
119 | 		job.setInputFormatClass(TextInputFormat.class);
120 | 		job.setOutputFormatClass(TextOutputFormat.class);
121 | 		
122 | 		MultipleOutputs.addNamedOutput(job, "17Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
123 | 		MultipleOutputs.addNamedOutput(job, "18Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
124 | 		MultipleOutputs.addNamedOutput(job, "19Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
125 | 		MultipleOutputs.addNamedOutput(job, "20Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
126 | 		MultipleOutputs.addNamedOutput(job, "21Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
127 | 		MultipleOutputs.addNamedOutput(job, "22Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
128 | 		MultipleOutputs.addNamedOutput(job, "23Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
129 | 		MultipleOutputs.addNamedOutput(job, "24Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
130 | 		MultipleOutputs.addNamedOutput(job, "25Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
131 | 		MultipleOutputs.addNamedOutput(job, "26Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
132 | 		MultipleOutputs.addNamedOutput(job, "27Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
133 | 		MultipleOutputs.addNamedOutput(job, "28Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
134 | 		MultipleOutputs.addNamedOutput(job, "29Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
135 | 		MultipleOutputs.addNamedOutput(job, "30Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
136 | 		
137 | 		FileInputFormat.addInputPath(job, new Path(hdfs_input));
138 | 		FileOutputFormat.setOutputPath(job, new Path(hdfs_output));
139 | 		
140 | 		//提交作业
141 | 		job.waitForCompletion(true);
142 | 		
143 | 		//
144 | 		System.out.println("User_agent Error:" + kpi.getNumUser_agent());
145 | 		System.out.println("Status Error:" + kpi.getStatus());
146 | 	}
147 | }
148 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/KPI_OneRequest_Sum.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.text.ParseException;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.hadoop.conf.Configuration;
  9 | import org.apache.hadoop.fs.Path;
 10 | import org.apache.hadoop.io.LongWritable;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.mapreduce.InputSplit;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Reducer;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 18 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | 
 23 | public class KPI_OneRequest_Sum {
 24 | 
 25 | 	private static KPIfilter kpi;   //声明一个kpi对象
 26 | 	//Mapper类
 27 | 	public static class ReMap extends Mapper<LongWritable , Text, Text , LongWritable>
 28 | 	{
 29 | 		String filename;       //读取的文件名
 30 | 		static Text reK1 = new Text();
 31 | 		static LongWritable reV1 = new LongWritable(1);
 32 | 		
 33 | 		//setup函数，没个Map执行一次
 34 | 		@Override
 35 | 		protected void setup(Context context)throws IOException, InterruptedException {
 36 | 			// TODO Auto-generated method stub
 37 | 			InputSplit split = context.getInputSplit();
 38 | 			filename =  ((FileSplit) split).getPath().getName();
 39 | 			filename = filename.substring(0, 11).replace("-", "");     //得到合法的文件名
 40 | 			System.out.println("filename： " + filename);
 41 | 		}
 42 | 		//map函数
 43 | 		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
 44 | 		{
 45 | 			try {
 46 | 				kpi = KPIfilter.parser(value.toString());
 47 | 				if(kpi.isValid())
 48 | 				{
 49 | 					reK1.set(kpi.getRequest()+"\t"+filename);
 50 | 					context.write(reK1, reV1);
 51 | 				}
 52 | 			} catch (ParseException e) {
 53 | 				// TODO Auto-generated catch block
 54 | 				e.printStackTrace();
 55 | 			}
 56 | 		}
 57 | 	}
 58 | 	//Reducer类
 59 | 	public static class ReReduce extends Reducer<Text, LongWritable, Text, LongWritable>
 60 | 	{
 61 | 		private static Text reK2 = new Text();                                               //key
 62 | 		private static LongWritable reV2 = new LongWritable();          //value
 63 | 		
 64 | 		private MultipleOutputs<Text, LongWritable> mos;                         //声明多路输出
 65 | 		//setup函数
 66 | 		@Override
 67 | 		protected void setup(Context context) throws IOException, InterruptedException {
 68 | 			mos = new MultipleOutputs<Text, LongWritable >(context);
 69 | 		}
 70 | 		//reduce函数
 71 | 		public void reduce(Text key, Iterable<LongWritable> values, Context contexts) throws IOException, InterruptedException
 72 | 		{
 73 | 			int sum=0;
 74 | 			String[] arr = key.toString().split("\t");
 75 | 			for (LongWritable w : values) {
 76 | 				sum += w.get();
 77 | 			}
 78 | 			reK2.set(arr[0]);
 79 | 			reV2.set(sum);
 80 | 			mos.write(arr[1], reK2, reV2);
 81 | 		}
 82 | 		//cleanup函数
 83 | 		@Override
 84 | 		protected void cleanup(Context context) throws IOException, InterruptedException {
 85 | 			// TODO Auto-generated method stub
 86 | 			mos.close();
 87 | 		}
 88 | 	}
 89 | 
 90 | 	public static void main(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 91 | 		// TODO Auto-generated method stub
 92 | 	
 93 | 		String hdfs_input = path.get("input_log");                     //指定输入输出文件夹
 94 | 		String hdfs_output = path.get("output_request");
 95 | 		
 96 | 		hdfsGYT hdfs = new hdfsGYT();
 97 | 		hdfs.rmr(hdfs_output);               //首先删除对应的hdfs上的文件输出目录
 98 | 		
 99 | 		Job job = new Job(new Configuration(), "RequestSum");
100 | 		job.setJarByClass(KPI_OneRequest_Sum.class);
101 | 		
102 | 		job.setMapperClass(ReMap.class);
103 | 		job.setReducerClass(ReReduce.class);
104 | 		
105 | 		job.setMapOutputKeyClass(Text.class);
106 | 		job.setMapOutputValueClass(LongWritable.class);
107 | 		
108 | 		job.setOutputKeyClass(Text.class);
109 | 		job.setOutputValueClass(LongWritable.class);
110 | 		
111 | 		job.setInputFormatClass(TextInputFormat.class);
112 | 		job.setOutputFormatClass(TextOutputFormat.class);
113 | 		
114 | 		MultipleOutputs.addNamedOutput(job, "17Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
115 | 		MultipleOutputs.addNamedOutput(job, "18Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
116 | 		MultipleOutputs.addNamedOutput(job, "19Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
117 | 		MultipleOutputs.addNamedOutput(job, "20Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
118 | 		MultipleOutputs.addNamedOutput(job, "21Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
119 | 		MultipleOutputs.addNamedOutput(job, "22Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
120 | 		MultipleOutputs.addNamedOutput(job, "23Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
121 | 		MultipleOutputs.addNamedOutput(job, "24Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
122 | 		MultipleOutputs.addNamedOutput(job, "25Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
123 | 		MultipleOutputs.addNamedOutput(job, "26Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
124 | 		MultipleOutputs.addNamedOutput(job, "27Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
125 | 		MultipleOutputs.addNamedOutput(job, "28Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
126 | 		MultipleOutputs.addNamedOutput(job, "29Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
127 | 		MultipleOutputs.addNamedOutput(job, "30Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
128 | 		
129 | 		FileInputFormat.addInputPath(job, new Path(hdfs_input));
130 | 		FileOutputFormat.setOutputPath(job, new Path(hdfs_output) );
131 | 		
132 | 		//提交作业
133 | 		job.waitForCompletion(true);
134 | 
135 | 		//
136 | 		System.out.println("User_agent Error:" + kpi.getNumUser_agent());
137 | 		System.out.println("Status Error:" + kpi.getStatus());
138 | 	}
139 | 	
140 | }
141 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/KPI_OneSource_Sum.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.text.ParseException;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.hadoop.conf.Configuration;
  9 | import org.apache.hadoop.fs.Path;
 10 | import org.apache.hadoop.io.LongWritable;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.mapreduce.InputSplit;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Reducer;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 18 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | 
 23 | 
 24 | public class KPI_OneSource_Sum {
 25 | 
 26 | 	private static KPIfilter kpi;   //声明一个kpi对象
 27 | 	
 28 | 	public static class SourceMap extends Mapper<LongWritable, Text, Text, LongWritable>{
 29 | 
 30 | 		static String filename;       //存储文件名
 31 | 		static Text  sK1 = new Text(); //key
 32 | 		static  LongWritable sV1 = new LongWritable(1);     //value
 33 | 		
 34 | 		@Override
 35 | 		protected void setup(Context context) throws IOException,InterruptedException {
 36 | 			// TODO Auto-generated method stub
 37 | 			InputSplit split = context.getInputSplit();
 38 | 			filename =  ((FileSplit) split).getPath().getName();
 39 | 			filename = filename.substring(0, 11).replace("-", "");     //得到合法的文件名
 40 | 			System.out.println("filename： " + filename);
 41 | 		}
 42 | 
 43 | 		//map函数
 44 | 		@Override
 45 | 		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
 46 | 			// TODO Auto-generated method stub
 47 | 			try {
 48 | 				kpi = KPIfilter.parser(value.toString());
 49 | 				if(kpi.isValid())
 50 | 				{
 51 | 					sK1.set(kpi.getUser_agent()+"\t"+filename);
 52 | 					context.write(sK1, sV1);
 53 | 				}
 54 | 			} catch (ParseException e) {
 55 | 				// TODO Auto-generated catch block
 56 | 				e.printStackTrace();
 57 | 			}
 58 | 		}
 59 | 		
 60 | 	}
 61 | 	
 62 | 	public static class SourceReduce extends Reducer< Text, LongWritable, Text , LongWritable>{
 63 | 
 64 | 		static Text sK2 = new Text();                     //key
 65 | 		static LongWritable sV2 = new LongWritable();        //value
 66 | 
 67 | 		private MultipleOutputs<Text, LongWritable> mos;                         //声明多路输出
 68 | 		
 69 | 		@Override
 70 | 		protected void setup(Context context)throws IOException, InterruptedException {
 71 | 			// TODO Auto-generated method stub
 72 | 			mos =new MultipleOutputs<Text, LongWritable> (context); 
 73 | 		}
 74 | 
 75 | 		@Override
 76 | 		protected void reduce(Text key, Iterable<LongWritable> values,Context context)
 77 | 				throws IOException, InterruptedException {
 78 | 			// TODO Auto-generated method stub
 79 | 			int sum=0;
 80 | 			String[] arr = key.toString().split("\t");
 81 | 			for (LongWritable w : values) {
 82 | 				sum += w.get();
 83 | 			}
 84 | 			sK2.set(arr[0]);
 85 | 			sV2.set(sum);
 86 | 			System.out.println(arr[1]);
 87 | 			mos.write(arr[1], sK2, sV2);
 88 | 		}
 89 | 		@Override
 90 | 		public void cleanup(Context context)throws IOException, InterruptedException {
 91 | 			// TODO Auto-generated method stub
 92 | 			mos.close();
 93 | 		}
 94 | 		
 95 | 	}
 96 | 	
 97 | 	public static void main(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 98 | 		// TODO Auto-generated method stub
 99 | 		String hdfs_input = path.get("input_log");                     //指定输入输出文件夹
100 | 		String hdfs_output = path.get("output_source");
101 | 		
102 | 		hdfsGYT hdfs = new hdfsGYT();
103 | 		hdfs.rmr(hdfs_output);               //首先删除对应的hdfs上的文件输出目录
104 | 		
105 | 		Job job = new Job(new Configuration(), "Resource");
106 | 		job.setJarByClass(KPI_OneSource_Sum.class);
107 | 		
108 | 		job.setMapperClass(SourceMap.class);
109 | 		job.setReducerClass(SourceReduce.class);
110 | 		
111 | 		job.setMapOutputKeyClass(Text.class);
112 | 		job.setMapOutputValueClass(LongWritable.class);
113 | 		
114 | 		job.setOutputKeyClass(Text.class);
115 | 		job.setOutputValueClass(LongWritable.class);
116 | 		
117 | 		job.setInputFormatClass(TextInputFormat.class);
118 | 		job.setOutputFormatClass(TextOutputFormat.class);
119 | 		
120 | 		MultipleOutputs.addNamedOutput(job, "17Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
121 | 		MultipleOutputs.addNamedOutput(job, "18Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
122 | 		MultipleOutputs.addNamedOutput(job, "19Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
123 | 		MultipleOutputs.addNamedOutput(job, "20Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
124 | 		MultipleOutputs.addNamedOutput(job, "21Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
125 | 		MultipleOutputs.addNamedOutput(job, "22Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
126 | 		MultipleOutputs.addNamedOutput(job, "23Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
127 | 		MultipleOutputs.addNamedOutput(job, "24Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
128 | 		MultipleOutputs.addNamedOutput(job, "25Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
129 | 		MultipleOutputs.addNamedOutput(job, "26Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
130 | 		MultipleOutputs.addNamedOutput(job, "27Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
131 | 		MultipleOutputs.addNamedOutput(job, "28Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
132 | 		MultipleOutputs.addNamedOutput(job, "29Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
133 | 		MultipleOutputs.addNamedOutput(job, "30Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
134 | 		
135 | 		FileInputFormat.addInputPath(job, new Path(hdfs_input));
136 | 		FileOutputFormat.setOutputPath(job, new Path(hdfs_output) );
137 | 		
138 | 		//提交作业
139 | 		job.waitForCompletion(true);
140 | 
141 | 		//
142 | 		System.out.println("User_agent Error:" + kpi.getNumUser_agent());
143 | 		System.out.println("Status Error:" + kpi.getStatus());
144 | 	}
145 | 
146 | }
147 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/KPI_OneTime_Sum.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.text.ParseException;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.hadoop.conf.Configuration;
  9 | import org.apache.hadoop.fs.Path;
 10 | import org.apache.hadoop.io.LongWritable;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.mapreduce.InputSplit;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Reducer;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 18 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | 
 23 | public class KPI_OneTime_Sum {
 24 | 	
 25 | 	private static KPIfilter kpi;   //声明一个kpi对象
 26 | 	
 27 | 	public static class OneTimeMap extends Mapper<LongWritable, Text, Text, LongWritable>{
 28 | 		String filename;       //读取的文件名
 29 | 		static Text timeK1 = new Text();
 30 | 		static LongWritable timeV1 = new LongWritable(1);
 31 | 		
 32 | 		//setup函数，没个Map执行一次
 33 | 		@Override
 34 | 		protected void setup(Context context) throws IOException, InterruptedException {
 35 | 			// TODO Auto-generated method stub
 36 | 			InputSplit split = context.getInputSplit();
 37 | 			filename =  ((FileSplit) split).getPath().getName();
 38 | 			filename = filename.substring(0, 11).replace("-", "");     //得到合法的文件名
 39 | 			System.out.println("filename： " + filename);
 40 | 		}
 41 | 		
 42 | 		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
 43 | 			try {
 44 | 				kpi = KPIfilter.filterPVs(value.toString());
 45 | 				if(kpi.isValid()){
 46 | 					timeK1.set(kpi.getTime_local_Date_hour()+"\t" + filename);
 47 | 					context.write(timeK1,timeV1);
 48 | 				}
 49 | 			} catch (ParseException e) {
 50 | 				// TODO Auto-generated catch block
 51 | 				e.printStackTrace();
 52 | 			}
 53 | 		}
 54 | 	}
 55 | 	
 56 | 	public static class OneTimeReduce extends Reducer<Text , LongWritable, Text, LongWritable>{
 57 | 		
 58 | 		private static Text timeK2 = new Text();
 59 | 		private static LongWritable timeV2 = new LongWritable();
 60 | 		
 61 | 		private MultipleOutputs<Text, LongWritable> mos;                         //声明多路输出
 62 | 		//setup函数
 63 | 		@Override
 64 | 		protected void setup(Context context) throws IOException, InterruptedException {
 65 | 			mos = new MultipleOutputs<Text, LongWritable >(context);
 66 | 		}
 67 | 		
 68 | 		public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException{
 69 | 			int sum = 0;
 70 | 			String[] arr = key.toString().split("\t");
 71 | 			String filename =arr[1];
 72 | 			for (LongWritable longWritable : values) {
 73 | 				sum += longWritable.get();
 74 | 			}
 75 | 			timeK2.set(arr[0]);
 76 | 			timeV2.set(sum);
 77 | 			mos.write(filename, timeK2, timeV2);
 78 | 		}
 79 | 		
 80 | 		public void cleanup(Context context) throws IOException, InterruptedException{
 81 | 			mos.close();
 82 | 		}
 83 | 	}
 84 | 
 85 | 	public static void main(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 86 | 		// TODO Auto-generated method stub
 87 | 		String hdfs_input = path.get("input_log");          //loghdfs存放
 88 | 		String hdfs_output = path.get("output_time");   //pv输出的目录
 89 | 		
 90 | 		hdfsGYT hdfs = new hdfsGYT();
 91 | 		hdfs.rmr(hdfs_output);    //如果存在输出的目录的首先删除，否则会报错
 92 | 		
 93 | 		Job job = new Job(new Configuration(), "OneTime");
 94 | 		job.setJarByClass(KPI_OnePV_Sum.class);
 95 | 		
 96 | 		job.setMapperClass(OneTimeMap.class);
 97 | 		job.setReducerClass(OneTimeReduce.class);
 98 | 		
 99 | 		job.setMapOutputKeyClass(Text.class);
100 | 		job.setMapOutputValueClass(LongWritable.class);
101 | 		
102 | 		job.setOutputKeyClass(Text.class);
103 | 		job.setOutputValueClass(LongWritable.class);
104 | 
105 | 		job.setInputFormatClass(TextInputFormat.class);
106 | 		job.setOutputFormatClass(TextOutputFormat.class);
107 | 		
108 | 		MultipleOutputs.addNamedOutput(job, "17Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
109 | 		MultipleOutputs.addNamedOutput(job, "18Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
110 | 		MultipleOutputs.addNamedOutput(job, "19Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
111 | 		MultipleOutputs.addNamedOutput(job, "20Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
112 | 		MultipleOutputs.addNamedOutput(job, "21Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
113 | 		MultipleOutputs.addNamedOutput(job, "22Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
114 | 		MultipleOutputs.addNamedOutput(job, "23Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
115 | 		MultipleOutputs.addNamedOutput(job, "24Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
116 | 		MultipleOutputs.addNamedOutput(job, "25Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
117 | 		MultipleOutputs.addNamedOutput(job, "26Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
118 | 		MultipleOutputs.addNamedOutput(job, "27Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
119 | 		MultipleOutputs.addNamedOutput(job, "28Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
120 | 		MultipleOutputs.addNamedOutput(job, "29Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
121 | 		MultipleOutputs.addNamedOutput(job, "30Apr2016", TextOutputFormat.class, Text.class, LongWritable.class);
122 | 		
123 | 		FileInputFormat.addInputPath(job, new Path(hdfs_input));
124 | 		FileOutputFormat.setOutputPath(job, new Path(hdfs_output));
125 | 		
126 | 		//提交作业
127 | 		job.waitForCompletion(true);
128 | 		
129 | 		//
130 | 		System.out.println("User_agent Error:" + kpi.getNumUser_agent());
131 | 		System.out.println("Status Error:" + kpi.getStatus());
132 | 	}
133 | 
134 | }
135 | 


--------------------------------------------------------------------------------
/Hadoop/WebKPI/hdfsGYT.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URI;
  5 | import java.net.URISyntaxException;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.FSDataInputStream;
  9 | import org.apache.hadoop.fs.FileStatus;
 10 | import org.apache.hadoop.fs.FileSystem;
 11 | import org.apache.hadoop.fs.Path;
 12 | import org.apache.hadoop.io.IOUtils;
 13 | 
 14 | public class hdfsGYT {
 15 | 	
 16 | 	private static  final String HDFS = "hdfs://127.0.0.1:9000/";
 17 | 
 18 | 	public hdfsGYT(String hdfs,  Configuration conf ){
 19 | 		this.hdfsPath = hdfs;
 20 | 		this.conf = conf;
 21 | 	}
 22 | 
 23 | 	public hdfsGYT() {
 24 | 		// TODO Auto-generated constructor stub
 25 | 	}
 26 | 
 27 | 	private String hdfsPath;
 28 | 	private Configuration conf = new Configuration() ;
 29 | 	
 30 | 	public static void main(String[] args) throws IOException, URISyntaxException{
 31 | 		hdfsGYT hdfsgyt = new hdfsGYT();
 32 | 		String folder = HDFS + "mr/groom_system/small2.csv";
 33 | 		String local = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian/small2.csv";
 34 | 		String local1 = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian";
 35 | 		//判断某个文件夹是否存在
 36 | 		//hdfsgyt.isExist(folder);
 37 | 		//创建文件夹
 38 | 		//hdfsgyt.mkdir(folder);
 39 | 		//删除文件夹
 40 | 		//hdfsgyt.rmr(folder);
 41 | 		//列出所有文件夹
 42 | 		//hdfsgyt.ls(folder);
 43 | 		//递归列出所有文件夹
 44 | 		//hdfsgyt.lsr(folder);
 45 | 		//上传文件
 46 | 		//hdfsgyt.put(local, folder);
 47 | 		//下载文件
 48 | 		//hdfsgyt.get(folder,local1);
 49 | 		//删除文件
 50 | 		//hdfsgyt.rm(folder);
 51 | 		//显示文件
 52 | 		//hdfsgyt.cat(folder);
 53 | 		//重命名文件
 54 | //		String path1 = HDFS + "mr/output";
 55 | //		String path2 = HDFS + "mr/input";
 56 | //		hdfsgyt.rename(path1,path2);
 57 | 	}
 58 | 	
 59 | 	//重命名文件
 60 | 	public void rename(String path1, String path2) throws IOException, URISyntaxException {
 61 | 		// TODO Auto-generated method stub
 62 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());	
 63 | 		try{
 64 | 			fs.rename(new Path(path1), new Path(path2 ) );
 65 | 			System.out.println("Rename   " + path1 + " To  " + path2 );
 66 | 		}finally{
 67 |             fs.close();
 68 | 		}
 69 | 	}
 70 | 
 71 | 	//显示文件
 72 | 	public static void cat(String folder) throws IOException, URISyntaxException {
 73 | 		// 与hdfs建立联系
 74 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 75 | 		Path path = new Path(folder);
 76 | 		 FSDataInputStream fsdis = null;
 77 | 	        System.out.println("cat: " + folder);
 78 | 	        try {  
 79 | 	            fsdis =fs.open(path);
 80 | 	            IOUtils.copyBytes(fsdis, System.out, 4096, false);  
 81 | 	          } finally {  
 82 | 	            IOUtils.closeStream(fsdis);
 83 | 	            fs.close();
 84 | 	          }
 85 | 	}
 86 | 
 87 | 	//删除文件
 88 | 	public static void rm(String folder) throws IOException, URISyntaxException {
 89 | 		//与hdfs建立联系
 90 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 91 | 		Path path = new Path(folder);
 92 | 		if(fs.deleteOnExit(path)){
 93 | 			fs.delete(path);
 94 | 			System.out.println("delete:" + folder);
 95 | 		}else{
 96 | 			System.out.println("The fiel is not exist!");
 97 | 		}
 98 | 		fs.close();
 99 | 	}
100 | 	
101 | 	//下载文件
102 | 	public static void get(String remote,  String local) throws IllegalArgumentException, IOException, URISyntaxException {
103 | 		// 建立联系
104 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
105 | 		fs.copyToLocalFile(new Path(remote), new Path(local));
106 | 		System.out.println("Get From :   " + remote  + "   To :" + local);
107 | 		fs.close();
108 | 	}
109 | 	
110 | 	//上传文件
111 | 	public static void put(String local, String remote) throws IOException, URISyntaxException {
112 | 		// 建立联系
113 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
114 | 		fs.copyFromLocalFile(new Path(local), new Path(remote));
115 | 		System.out.println("Put :" + local  + "   To : " + remote);
116 | 		fs.close();
117 | 	}
118 | 
119 | 	//递归列出所有文件夹
120 | 	public static void lsr(String folder) throws IOException, URISyntaxException {
121 | 		//与hdfs建立联系
122 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
123 | 		Path path = new Path(folder);
124 | 		//得到该目录下的所有文件
125 | 		FileStatus[] fileList = fs.listStatus(path);
126 | 		for (FileStatus f : fileList) {
127 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
128 |             try{
129 |             	FileStatus[] fileListR = fs.listStatus(f.getPath());
130 |             	for(FileStatus fr:fileListR){
131 |                     System.out.printf("name: %s   |   folder: %s  |   size: %d\n", fr.getPath(),  fr.isDir() , fr.getLen());
132 |             	}
133 |             }finally{
134 |             	continue;
135 |             }
136 |         }
137 | 		fs.close();
138 | 	}
139 | 	
140 | 	//列出所有文件夹
141 | 	public static void ls(String folder) throws IOException, URISyntaxException {
142 | 		//与hdfs建立联系
143 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
144 | 		Path path = new Path(folder);
145 | 		//得到该目录下的所有文件
146 | 		FileStatus[] fileList = fs.listStatus(path);
147 | 		for (FileStatus f : fileList) {
148 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
149 |         }
150 | 		fs.close();
151 | 	}
152 | 
153 | 	//删除文件夹
154 | 	public static void rmr(String folder) throws IOException, URISyntaxException {
155 | 		//与hdfs建立联系
156 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
157 | 		Path path = new Path(folder);
158 | 		fs.delete(path);
159 | 		System.out.println("delete:" + folder);
160 | 		fs.close();
161 | 	}
162 | 	
163 | 	//创建文件夹
164 | 	public static void mkdir(String folder) throws IOException, URISyntaxException {
165 | 		//与hdfs建立联系
166 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
167 | 		Path path = new Path(folder);
168 | 		if (!fs.exists(path)) {
169 | 			fs.mkdirs(path);
170 | 			System.out.println("Create: " + folder);
171 | 		}else{
172 | 			System.out.println("it is have exist:" + folder);
173 | 		}
174 | 		fs.close();	
175 | 	}
176 | 	
177 | 	//判断某个文件夹是否存在
178 | 	public static void isExist(String folder) throws IOException, URISyntaxException {
179 | 		//与hdfs建立联系
180 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
181 | 		Path path = new Path(folder);
182 | 		if(fs.exists(path)){
183 | 			System.out.println("it is have exist:" + folder);
184 | 		}else{
185 | 			System.out.println("it is not exist:" + folder);
186 | 		}	
187 | 		fs.close();
188 | 	}
189 | 	
190 | }


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step1.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.Map;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.Path;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapreduce.InputSplit;
 12 | import org.apache.hadoop.mapreduce.Job;
 13 | import org.apache.hadoop.mapreduce.Mapper;
 14 | import org.apache.hadoop.mapreduce.Reducer;
 15 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 17 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 | 
 21 | /*
 22 |  * Step1：得到评分矩阵
 23 |  */
 24 | public class Step1 {
 25 | 
 26 | 	//Map类
 27 | 	public static class Step1_Map extends Mapper<LongWritable, Text, Text, Text>{
 28 | 
 29 | 		String filename; //存放文件名字
 30 | 		static Text k1 = new Text(); //key
 31 | 		static Text v1 = new Text();//value
 32 | 		
 33 | 		//setup函数，每次运行Map类只执行一次,获取并打印文件名
 34 | 		@Override
 35 | 		protected void setup(Context context) throws IOException,InterruptedException {
 36 | 			// TODO Auto-generated method stub
 37 | 			InputSplit inputsplit = context.getInputSplit();
 38 | 			filename = ((FileSplit) inputsplit).getPath().getName();
 39 | 			System.out.println("Filename：" + filename);
 40 | 		}
 41 | 
 42 | 		@Override
 43 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
 44 | 			// TODO Auto-generated method stub
 45 | //			String[] arr = value.toString().split(",");
 46 | 			String[] arr = value.toString().split("\t");
 47 | 			k1.set(arr[0]);
 48 | 			v1.set(arr[2]+":"+arr[1]);
 49 | //			v1.set(arr[1]+":"+arr[2]);
 50 | 			context.write(k1, v1);
 51 | 		}
 52 | 		
 53 | 	}
 54 | 	
 55 | 	//Reduce类
 56 | 	public static class Step1_Reduce extends Reducer<Text,Text, Text, Text>{
 57 | 		static Text k2 = new Text();
 58 | 		static Text v2 = new Text();
 59 | 		@Override
 60 | 		protected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {
 61 | 			// TODO Auto-generated method stub
 62 | 			String id_score="";
 63 | 			for (Text text : values) {
 64 | 				id_score += "," + text.toString();
 65 | 			}
 66 | 			id_score = id_score.substring(1);
 67 | 			k2.set(key);
 68 | 			v2.set(id_score);
 69 | 			context.write(k2, v2);
 70 | 		}
 71 | 		
 72 | 	}
 73 | 	
 74 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 75 | 		// TODO Auto-generated method stub
 76 | 		String local_path = path.get("local_file");       //存放文件的本地目录
 77 | 		String hdfs_file_path = path.get("hdfs_root_file");   //hdfs上存放文件的目录
 78 | 		String input_path = path.get("hdfs_step1_input");  //step1的输入文件目录
 79 | 		String output_path = path.get( "hdfs_step1_output"); //step2的输出文件目录
 80 | 		System.out.println(local_path);
 81 | 		System.out.println(hdfs_file_path);
 82 | 		System.out.println(input_path);
 83 | 		System.out.println(output_path);
 84 | 		
 85 | 		hdfsGYT hdfs = new hdfsGYT();           //声明一个hdfs的操作对象
 86 | 		hdfs.rmr(input_path);           //若输入的文件目录存在则删除
 87 | 		hdfs.rmr(output_path);     //若输出的文件目录存放则删除
 88 | 		hdfs.put(local_path, input_path);   //将本地文件上传至hdfs
 89 | 		
 90 | 		Job job = new Job(new Configuration(), "BookRecommend");
 91 | 		job.setJarByClass(Step1.class);
 92 | 		
 93 | 		//设置文件路径
 94 | 		FileInputFormat.setInputPaths(job, new Path(input_path));
 95 | 		FileOutputFormat.setOutputPath(job, new Path(output_path));
 96 | 	
 97 | 		//设置Map和Reduce类
 98 | 		job.setMapperClass(Step1_Map.class);
 99 | 		job.setReducerClass(Step1_Reduce.class);
100 | 	
101 | 		//设置map的输入输出格式
102 | 		job.setMapOutputKeyClass(Text.class);
103 | 		job.setMapOutputValueClass(Text.class);
104 | 		
105 | 		//设置reduce的输入输出格式
106 | 		job.setOutputKeyClass(Text.class);
107 | 		job.setOutputValueClass(Text.class);
108 | 		
109 | 		//设置文件
110 | 		job.setInputFormatClass(TextInputFormat.class);
111 | 		job.setOutputFormatClass(TextOutputFormat.class);
112 | 		
113 | 		//提交作业
114 | 		job.waitForCompletion(true);
115 | 	}
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step2.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.Map;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.Path;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapreduce.InputSplit;
 12 | import org.apache.hadoop.mapreduce.Job;
 13 | import org.apache.hadoop.mapreduce.Mapper;
 14 | import org.apache.hadoop.mapreduce.Reducer;
 15 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 17 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 | 
 21 | public class Step2 {
 22 | 	
 23 | 	//Map
 24 | 	public static class Step2_Map extends Mapper<LongWritable, Text, Text, LongWritable>{
 25 | 
 26 | 		String filename;
 27 | 		static Text k1  = new Text();
 28 | 		static LongWritable v1 = new LongWritable(1);
 29 | 		@Override
 30 | 		protected void setup(Context context) throws IOException,InterruptedException {
 31 | 			// TODO Auto-generated method stub
 32 | 			InputSplit inputsplit = context.getInputSplit();
 33 | 			filename = ((FileSplit) inputsplit).getPath().getName();
 34 | 			System.out.println("Step2 FileNme:" + filename);
 35 | 		}
 36 | 		
 37 | 
 38 | 		@Override
 39 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
 40 | 			// TODO Auto-generated method stub
 41 | 			String[] arrs = bookRecommend.DELIMITER.split(value.toString());
 42 | 			for (int i =1; i < arrs.length; i++ )
 43 | 			{
 44 | 				String itemID = arrs[i].split(":")[0];
 45 | 				for( int j=1; j< arrs.length; j++)
 46 | 				{
 47 | 					String itemID2 = arrs[j].split(":")[0];
 48 | 					k1.set(itemID+":" + itemID2);
 49 | 					context.write(k1,v1);
 50 | 				}
 51 | 			}
 52 | 		}
 53 | 	}
 54 | 
 55 | 	public static class Step2_Reduce extends Reducer<Text , LongWritable, Text, LongWritable>{
 56 | 
 57 | 		static LongWritable v2 = new LongWritable();
 58 | 		@Override
 59 | 		protected void reduce(Text key, Iterable<LongWritable> values,Context context)throws IOException, InterruptedException {
 60 | 			// TODO Auto-generated method stub
 61 | 			int num = 0;
 62 | 			for (LongWritable text : values) {
 63 | 				num += text.get();
 64 | 			}
 65 | 			v2.set(num);
 66 | 			context.write(key,v2);
 67 | 		}
 68 | 		
 69 | 	} 
 70 | 	
 71 | 	
 72 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 73 | 		// TODO Auto-generated method stub
 74 | 		String input_path = path.get("hdfs_step2_input"); 
 75 | 		String output_path = path.get("hdfs_step2_output");
 76 | 		
 77 | 		hdfsGYT hdfs = new hdfsGYT();
 78 | 		hdfs.rmr(output_path);
 79 | 		
 80 | 		Job job = new Job(new Configuration(), "Step2");
 81 | 		job.setJarByClass(Step2.class);
 82 | 		
 83 | 		//设置文件路径
 84 | 		FileInputFormat.setInputPaths(job, new Path(input_path));
 85 | 		FileOutputFormat.setOutputPath(job, new Path(output_path));
 86 | 		
 87 | 	    //设置Map和Reduce类
 88 | 		job.setMapperClass(Step2_Map.class);
 89 | 		job.setReducerClass(Step2_Reduce.class);
 90 | 		
 91 | 		//设置map的输出格式
 92 | 		job.setMapOutputKeyClass(Text.class);
 93 | 		job.setMapOutputValueClass(LongWritable.class);
 94 | 		
 95 | 		//设置reduce的输出格式
 96 | 		job.setOutputKeyClass(Text.class);
 97 | 		job.setOutputValueClass(LongWritable.class);
 98 | 				
 99 | 		//设置文件
100 | 		job.setInputFormatClass(TextInputFormat.class);
101 | 		job.setOutputFormatClass(TextOutputFormat.class);
102 | 				
103 | 		//提交作业
104 | 		job.waitForCompletion(true);
105 | 	}
106 | 
107 | }
108 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step3_1.java:
--------------------------------------------------------------------------------
 1 | package bookTuijian;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.Map;
 6 | 
 7 | import org.apache.hadoop.conf.Configuration;
 8 | import org.apache.hadoop.fs.Path;
 9 | import org.apache.hadoop.io.LongWritable;
10 | import org.apache.hadoop.io.Text;
11 | import org.apache.hadoop.mapreduce.InputSplit;
12 | import org.apache.hadoop.mapreduce.Job;
13 | import org.apache.hadoop.mapreduce.Mapper;
14 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
16 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 | 
20 | public class Step3_1 {
21 | 	
22 | 	public static class Step3_1_Map extends Mapper<LongWritable, Text, Text, Text>{
23 | 		String filename;
24 | 		
25 | 		static Text k1  = new Text();
26 | 		static Text v1 = new Text();
27 | 		@Override
28 | 		protected void setup(Context context) throws IOException,InterruptedException {
29 | 			// TODO Auto-generated method stub
30 | 			InputSplit inputsplit = context.getInputSplit();
31 | 			filename = ((FileSplit) inputsplit).getPath().getName();
32 | 			System.out.println("Step2 FileNme:" + filename);
33 | 		}
34 | 		
35 | 		@Override
36 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
37 | 			// TODO Auto-generated method stub
38 | 			String[] arrs = bookRecommend.DELIMITER.split(value.toString());
39 | 			for(int i=1; i< arrs.length; i++)
40 | 			{
41 | 				String itemID = arrs[i].split(":")[0];
42 | 				String score = arrs[i].split(":")[1];
43 | 				k1.set(itemID);
44 | 				v1.set(arrs[0] + ":" + score);
45 | 				context.write(k1,v1);
46 | 			}
47 | 		}
48 | 	}
49 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
50 | 		// TODO Auto-generated method stub
51 | 		String input_path = path.get("hdfs_step3_1_input"); 
52 | 		String output_path = path.get("hdfs_step3_1_output");
53 | 		
54 | 		hdfsGYT hdfs = new hdfsGYT();
55 | 		hdfs.rmr(output_path);
56 | 		
57 | 		Job job = new Job(new Configuration(), "Step3_1");
58 | 		job.setJarByClass(Step2.class);
59 | 		
60 | 		//设置文件路径
61 | 		FileInputFormat.setInputPaths(job, new Path(input_path));
62 | 		FileOutputFormat.setOutputPath(job, new Path(output_path));
63 | 		
64 | 	    //设置Map和Reduce类
65 | 		job.setMapperClass(Step3_1_Map.class);
66 | 		
67 | 		//设置map的输出格式
68 | 		job.setMapOutputKeyClass(Text.class);
69 | 		job.setMapOutputValueClass(Text.class);
70 | 				
71 | 		//设置文件
72 | 		job.setInputFormatClass(TextInputFormat.class);
73 | 		job.setOutputFormatClass(TextOutputFormat.class);
74 | 				
75 | 		//提交作业
76 | 		job.waitForCompletion(true);
77 | 	}
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step3_2.java:
--------------------------------------------------------------------------------
 1 | package bookTuijian;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.Map;
 6 | 
 7 | import org.apache.hadoop.conf.Configuration;
 8 | import org.apache.hadoop.fs.Path;
 9 | import org.apache.hadoop.io.LongWritable;
10 | import org.apache.hadoop.io.Text;
11 | import org.apache.hadoop.mapreduce.InputSplit;
12 | import org.apache.hadoop.mapreduce.Job;
13 | import org.apache.hadoop.mapreduce.Mapper;
14 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
16 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 | 
20 | public class Step3_2 {
21 | 	
22 | 	public static class Step3_2_Map extends Mapper<LongWritable, Text, Text, LongWritable>{
23 | 
24 | 		String filename;
25 | 		static Text k1 = new Text();
26 | 		static LongWritable v1 = new LongWritable();
27 | 		
28 |  		@Override
29 | 		protected void setup(Context context) throws IOException,InterruptedException {
30 | 			// TODO Auto-generated method stub
31 |  			InputSplit inputsplit = context.getInputSplit();
32 |  			filename = ((FileSplit) inputsplit).getPath().getName();
33 |  			System.out.println("Step3_2 FileName : " + filename);
34 | 		}
35 | 		
36 | 		@Override
37 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
38 | 			// TODO Auto-generated method stub
39 | 			//101:101	5
40 | 			String[] arrs = bookRecommend.DELIMITER.split(value.toString());
41 | 			k1.set(arrs[0]);
42 | 			v1.set(Integer.parseInt( arrs[1]) );
43 | 			context.write(k1, v1);
44 | 		}
45 | 	}
46 | 
47 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
48 | 		// TODO Auto-generated method stub
49 | 		String input_path = path.get("hdfs_step3_2_input"); 
50 | 		String output_path = path.get("hdfs_step3_2_output");
51 | 		
52 | 		hdfsGYT hdfs = new hdfsGYT();
53 | 		hdfs.rmr(output_path);
54 | 		
55 | 		Job job = new Job(new Configuration(), "Step2");
56 | 		job.setJarByClass(Step2.class);
57 | 		
58 | 		//设置文件路径
59 | 		FileInputFormat.setInputPaths(job, new Path(input_path));
60 | 		FileOutputFormat.setOutputPath(job, new Path(output_path));
61 | 		
62 | 	    //设置Map和Reduce类
63 | 		job.setMapperClass(Step3_2_Map.class);
64 | 		
65 | 		//设置map的输出格式
66 | 		job.setMapOutputKeyClass(Text.class);
67 | 		job.setMapOutputValueClass(LongWritable.class);
68 | 				
69 | 		//设置文件
70 | 		job.setInputFormatClass(TextInputFormat.class);
71 | 		job.setOutputFormatClass(TextOutputFormat.class);
72 | 				
73 | 		//提交作业
74 | 		job.waitForCompletion(true);
75 | 
76 | 	}
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step4.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.ArrayList;
  6 | import java.util.HashMap;
  7 | import java.util.Iterator;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | 
 11 | import org.apache.hadoop.conf.Configuration;
 12 | import org.apache.hadoop.fs.Path;
 13 | import org.apache.hadoop.io.LongWritable;
 14 | import org.apache.hadoop.io.Text;
 15 | import org.apache.hadoop.mapreduce.InputSplit;
 16 | import org.apache.hadoop.mapreduce.Job;
 17 | import org.apache.hadoop.mapreduce.Mapper;
 18 | import org.apache.hadoop.mapreduce.Reducer;
 19 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 21 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 22 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 23 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 24 | 
 25 | public class Step4 {
 26 | 	
 27 | 	public static class Step4_Map extends Mapper<LongWritable, Text, Text, Text>{
 28 | 
 29 | 		String filename;
 30 | 		static Text k1 =new Text();
 31 | 		static Text value1 = new Text();
 32 | 		
 33 | 		private final static Map<Integer, List<Coocurence>> coocurenceMatrix = 	new HashMap<Integer, List<Coocurence>>();
 34 | 		
 35 | 		@Override
 36 | 		protected void setup(Context context) throws IOException,InterruptedException {
 37 | 			// TODO Auto-generated method stub
 38 | 			InputSplit inputsplit = context.getInputSplit();
 39 | 			filename = ((FileSplit) inputsplit).getPath().getName();
 40 | 			System.out.println("Step4 Filename : " + filename);
 41 | 		}
 42 | 		
 43 | 		@Override
 44 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
 45 | 			// TODO Auto-generated method stub
 46 | 			String[] arrs = bookRecommend.DELIMITER.split(value.toString());
 47 | //			System.out.println(value.toString() + "==================");
 48 | 
 49 | 			String [] v1 = arrs[0].split(":");
 50 | 			String [] v2 = arrs[1].split(":");
 51 | 			
 52 | 			if(v1.length>1)  //数据来自同现矩阵
 53 | 			{
 54 | //				System.out.println(value.toString()+"++++++++++++++++++++++++++==");
 55 | 				int itemID1 = Integer.parseInt(v1[0]);
 56 | 				int itemID2 = Integer.parseInt(v1[1]);
 57 | 				int num = Integer.parseInt(arrs[1]);
 58 | 				
 59 | 				List list = null;
 60 | 				if(!coocurenceMatrix.containsKey(itemID1)){
 61 | 					list = new ArrayList();
 62 | 				}else{
 63 | 					list = coocurenceMatrix.get(itemID1 );
 64 | 				}
 65 | 				list.add(new Coocurence(itemID1, itemID2, num) );
 66 | 				coocurenceMatrix.put(itemID1, list);
 67 | 			}
 68 | 			if(v2.length>1) //数据来自评分矩阵
 69 | 			{
 70 | 				System.out.println(value.toString()+"-------------------------------");
 71 | 				int itemID = Integer.parseInt(arrs[0]);
 72 | 				String userID = v2[0];
 73 | 				double score = Float.parseFloat(v2[1]);
 74 | 				k1.set(userID);
 75 | 				for(Coocurence co : coocurenceMatrix.get(itemID))
 76 | 				{
 77 | 					value1.set(co.getItemID2() + "," + score * co.getNum());
 78 | 					context.write(k1, value1);
 79 | 					//itemID1, itemID2 +"," + score * num
 80 | 				}
 81 | 			}
 82 | 		}
 83 | 	}
 84 | 	
 85 | 	public static class Step4_Reduce extends Reducer<Text, Text, Text,Text>{
 86 | 
 87 | 		private static Text value2 = new Text();
 88 | 		
 89 | 		@Override
 90 | 		protected void reduce(Text key, Iterable<Text> values,Context  context)	throws IOException, InterruptedException {
 91 | 			// TODO Auto-generated method stub
 92 | 			Map<String,Double> result  = new HashMap<String, Double>();
 93 | 			for (Text text : values) {
 94 | 				String[] arrs =text.toString().split(",");
 95 | 				if (result.containsKey(arrs[0]))
 96 | 				{
 97 | 					result.put(arrs[0], result.get(arrs[0]) + Double.parseDouble(arrs[1]));
 98 | 				}else
 99 | 				{
100 | 					result.put(arrs[0], Double.parseDouble(arrs[1]));
101 | 				}
102 | 			}
103 | 			Iterator iter = result.keySet().iterator();
104 | 			while(iter.hasNext())
105 | 			{
106 | 				String itemID = (String) iter.next();
107 | 				double score = result.get(itemID);
108 | 				value2.set(itemID + "," + score);
109 | 				context.write(key, value2);
110 | 			}
111 | 		}
112 | 	}
113 | 
114 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
115 | 		// TODO Auto-generated method stub
116 | 		String input_1 = path.get("hdfs_step4_input_1");
117 | 		String input_2 = path.get("hdfs_step4_input_2");
118 | 		String output = path.get("hdfs_step4_output");
119 | 		
120 | 		hdfsGYT hdfs = new hdfsGYT();
121 | 		hdfs.rmr(output);
122 | 		
123 | 		Job job = new Job( new Configuration(), "Step4");
124 | 		job.setJarByClass(Step4.class);
125 | 		
126 | 		//设置文件路径
127 | 		FileInputFormat.setInputPaths(job, new Path(input_2),new Path(input_1));
128 | 		FileOutputFormat.setOutputPath(job, new Path(output));
129 | 
130 | 		//设置Map和Reduce类
131 | 		job.setMapperClass(Step4_Map.class);
132 | 		job.setReducerClass(Step4_Reduce.class);
133 | 			
134 | 		//设置map的输入输出格式
135 | 		job.setMapOutputKeyClass(Text.class);
136 | 		job.setMapOutputValueClass(Text.class);
137 | 				
138 | 		//设置reduce的输入输出格式
139 | 		job.setOutputKeyClass(Text.class);
140 | 		job.setOutputValueClass(Text.class);
141 | 				
142 | 		//设置文件
143 | 		job.setInputFormatClass(TextInputFormat.class);
144 | 		job.setOutputFormatClass(TextOutputFormat.class);
145 | 				
146 | 		//提交作业
147 | 		job.waitForCompletion(true);
148 | 		}
149 | 	}
150 | 
151 | class Coocurence{
152 | 	private int itemID1;
153 | 	private int itemID2;
154 | 	private int num;
155 | 	
156 | 	public Coocurence(int itemID1, int itemID2, int num){
157 | 		this.itemID1 = itemID1;
158 | 		this.itemID2 = itemID2;
159 | 		this.num = num;
160 | 	}
161 | 	
162 | 	public int getItemID1() {
163 | 		return itemID1;
164 | 	}
165 | 	public void setItemID1(int itemID1) {
166 | 		this.itemID1 = itemID1;
167 | 	}
168 | 	public int getItemID2() {
169 | 		return itemID2;
170 | 	}
171 | 	public void setItemID2(int itemID2) {
172 | 		this.itemID2 = itemID2;
173 | 	}
174 | 	public int getNum() {
175 | 		return num;
176 | 	}
177 | 	public void setNum(int num) {
178 | 		this.num = num;
179 | 	}	
180 | }


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step4_Updata.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.HashMap;
  6 | import java.util.Iterator;
  7 | import java.util.Map;
  8 | 
  9 | import org.apache.hadoop.conf.Configuration;
 10 | import org.apache.hadoop.fs.Path;
 11 | import org.apache.hadoop.io.LongWritable;
 12 | import org.apache.hadoop.io.Text;
 13 | import org.apache.hadoop.mapreduce.InputSplit;
 14 | import org.apache.hadoop.mapreduce.Job;
 15 | import org.apache.hadoop.mapreduce.Mapper;
 16 | import org.apache.hadoop.mapreduce.Reducer;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 19 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | 
 23 | /*
 24 |  * 是对Step4的优化，分为矩阵相乘和相加，这一步是相乘
 25 |  */
 26 | public class Step4_Updata {
 27 | 
 28 | 	public static class Step4_Updata_Map extends Mapper< LongWritable, Text, Text, Text>{
 29 | 
 30 | 		String filename;
 31 | 		@Override
 32 | 		protected void setup(Context context) throws IOException,InterruptedException {
 33 | 			// TODO Auto-generated method stub
 34 | 			InputSplit input = context.getInputSplit();
 35 | 			filename = ((FileSplit) input).getPath().getParent().getName();
 36 | 			System.out.println("FileName：" +filename);
 37 | 		}
 38 | 		
 39 | 		@Override
 40 | 		protected void map(LongWritable key, Text value, Context context)	throws IOException, InterruptedException {
 41 | 			// TODO Auto-generated method stub
 42 | 			String[] tokens = bookRecommend.DELIMITER.split(value.toString());      //切分
 43 | 			
 44 | 			if(filename.equals("Step3_2") ){ //同现矩阵
 45 | 				String[] v1 = tokens[0].split(":");
 46 | 				String itemID1 = v1[0];
 47 | 				String itemID2 = v1[1];
 48 | 				String num = tokens[1];
 49 | 				
 50 | 				Text key1 = new Text(itemID1);
 51 | 				Text value1 = new Text("A:" + itemID2 +"," +num);
 52 | 				context.write(key1,value1);
 53 | //				System.out.println(key1.toString() + "\t" + value1.toString());
 54 | 			}else{    //评分矩阵
 55 | 				String[] v2 = tokens[1].split(":");
 56 | 				String itemID = tokens[0];
 57 | 				String userID = v2[0];
 58 | 				String score = v2[1];
 59 | 				
 60 | 				Text key1 = new Text(itemID);
 61 | 				Text value1 = new Text("B:" + userID + "," + score);
 62 | 				context.write(key1,value1);
 63 | //				System.out.println(key1.toString() + "\t" + value1.toString());
 64 | 			}
 65 | 		}
 66 | 	}
 67 | 
 68 | 	public static class Step4_Updata_Reduce extends Reducer<Text, Text, Text, Text>{
 69 | 
 70 | 		@Override
 71 | 		protected void reduce(Text key, Iterable<Text> values,Context context)	throws IOException, InterruptedException {
 72 | 			// TODO Auto-generated method stub
 73 | //			System.out.println(key.toString()+ ":");
 74 | 			
 75 | 			Map mapA = new HashMap();
 76 | 			Map mapB = new HashMap();
 77 | 			
 78 | 			for(Text line : values){
 79 | 				String val = line.toString();
 80 | //				System.out.println(val);
 81 | 				if(val.startsWith("A")){
 82 | 					String[] kv = bookRecommend.DELIMITER.split(val.substring(2));
 83 | 					mapA.put(kv[0], kv[1]); //ItemID, num
 84 | //					System.out.println(kv[0] + "\t" + kv[1] + "--------------1");
 85 | 				}else if(val.startsWith("B")){
 86 | 					String[] kv = bookRecommend.DELIMITER.split(val.substring(2));
 87 | 					mapB.put(kv[0], kv[1]); //userID, score
 88 | //					System.out.println(kv[0] + "\t" + kv[1] + "--------------2");
 89 | 				}
 90 | 			}
 91 | 			
 92 | 			double result = 0;
 93 | 			Iterator iterA = mapA.keySet().iterator();
 94 | 			while(iterA.hasNext()){
 95 | 				String mapkA = (String) iterA.next();  //itemID
 96 | 				int num = Integer.parseInt((String) mapA.get(mapkA));     // num
 97 | 				Iterator iterB = mapB.keySet().iterator();
 98 | 				while(iterB.hasNext()){
 99 | 					String mapkB = (String)iterB.next(); //UserID
100 | 					double score = Double.parseDouble((String) mapB.get(mapkB));  //score
101 | 					result = num * score; //矩阵乘法结果
102 | 					
103 | 					Text key2 = new Text(mapkB);
104 | 					Text value2 = new Text(mapkA + "," +result);
105 | 					context.write(key2,value2); //userID \t  itemID,result
106 | //					System.out.println(key2.toString() + "\t" + value2.toString());
107 | 				}
108 | 			}
109 | 		}
110 | 		
111 | 	}
112 | 	
113 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
114 | 		// TODO Auto-generated method stub
115 | 		
116 | 		String input_1 = path.get("hdfs_step4_updata_input");
117 | 		String input_2 = path.get("hdfs_step4_updata2_input");
118 | 		String output = path.get("hdfs_step4_updata_output");
119 | 		
120 | 		hdfsGYT hdfs = new hdfsGYT();
121 | 		hdfs.rmr(output);
122 | 		
123 | 		Job job = new Job(new Configuration(), "Step4_updata");
124 | 		job.setJarByClass(Step4_Updata.class);
125 | 		//设置文件输入输出路径
126 | 		FileInputFormat.setInputPaths(job, new Path(input_1),new Path(input_2));
127 | 		FileOutputFormat.setOutputPath(job, new Path(output));
128 | 		
129 | 		//设置map和reduce类
130 | 		job.setMapperClass(Step4_Updata_Map.class);
131 | 		job.setReducerClass(Step4_Updata_Reduce.class);
132 | 		
133 | 		//设置Map输出
134 | 		job.setMapOutputKeyClass(Text.class);
135 | 		job.setMapOutputValueClass(Text.class);
136 | 		
137 | 		//设置Reduce输出
138 | 		job.setOutputKeyClass(Text.class);
139 | 		job.setOutputValueClass(Text.class);
140 | 		
141 | 		//设置文件输入输出
142 | 		job.setInputFormatClass(TextInputFormat.class);
143 | 		job.setOutputFormatClass(TextOutputFormat.class);
144 | 		
145 | 		job.waitForCompletion(true);
146 | 	}
147 | 
148 | }
149 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/Step4_Updata2.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.HashMap;
  6 | import java.util.Iterator;
  7 | import java.util.Map;
  8 | 
  9 | import org.apache.hadoop.conf.Configuration;
 10 | import org.apache.hadoop.fs.Path;
 11 | import org.apache.hadoop.io.LongWritable;
 12 | import org.apache.hadoop.io.Text;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Reducer;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 | 
 21 | 
 22 | public class Step4_Updata2 {
 23 | 
 24 | 	public static class Step4_Updata2_Map extends Mapper<LongWritable, Text, Text, Text>{
 25 | 
 26 | 		@Override
 27 | 		protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
 28 | 			// TODO Auto-generated method stub
 29 | 		String[] tokens = bookRecommend.DELIMITER.split(value.toString());
 30 | 		Text key1 = new Text(tokens[0]);//userID 
 31 | 		Text value1 = new Text(tokens[1] + "," + tokens[2]);
 32 | 		context.write(key1, value1);    //itemID,result
 33 | 		}
 34 | 		
 35 | 	}
 36 | 	
 37 | 	public static class Step4_Updata_Reduce extends Reducer< Text, Text, Text, Text>{
 38 | 
 39 | 		@Override
 40 | 		protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
 41 | 			// TODO Auto-generated method stub
 42 | 			Map map = new HashMap();
 43 | 			
 44 | 			for(Text line: values){
 45 | 				System.out.println(line.toString());
 46 | 				String[] tokens = bookRecommend.DELIMITER.split(line.toString());
 47 | 				String itemID = tokens[0];
 48 | 				Double result = Double.parseDouble(tokens[1]);
 49 | 				
 50 | 				if(map.containsKey(itemID)){
 51 | 					map.put(itemID, Double.parseDouble(map.get(itemID).toString()) + result);//矩阵乘法求和计算
 52 | 				}else{
 53 | 					map.put(itemID, result);
 54 | 				}
 55 | 			}
 56 | 			Iterator iter = map.keySet().iterator();
 57 |             while (iter.hasNext()) {
 58 |                 String itemID = (String) iter.next();
 59 |                 double score = (double) map.get(itemID);
 60 |                 Text v = new Text(itemID + "," + score);
 61 |                 context.write(key, v);
 62 |             }
 63 | 		}
 64 | 		
 65 | 	}
 66 | 	
 67 | 	public static void run(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 68 | 		// TODO Auto-generated method stub
 69 | 		String input = path.get("hdfs_step4_updata2_input");
 70 | 		String output = path.get("hdfs_step4_updata2_output");
 71 | 		
 72 | 		hdfsGYT hdfs = new hdfsGYT();
 73 | 		hdfs.rmr(output);
 74 | 		
 75 | 		Job job = new Job(new Configuration(), "Step4_Updata2");
 76 | 		job.setJarByClass(Step4_Updata2.class);
 77 | 		
 78 | 		FileInputFormat.addInputPath(job, new Path(input));
 79 | 		FileOutputFormat.setOutputPath(job, new Path(output));
 80 | 		
 81 | 		//设置map和reduce类
 82 | 		job.setMapperClass(Step4_Updata2_Map.class);
 83 | 		job.setReducerClass(Step4_Updata_Reduce.class);
 84 | 		
 85 | 		//设置Map输出
 86 | 		job.setMapOutputKeyClass(Text.class);
 87 | 		job.setMapOutputValueClass(Text.class);
 88 | 				
 89 | 		//设置Reduce输出
 90 | 		job.setOutputKeyClass(Text.class);
 91 | 		job.setOutputValueClass(Text.class);
 92 | 			
 93 | 		//设置文件输入输出
 94 | 		job.setInputFormatClass(TextInputFormat.class);
 95 | 		job.setOutputFormatClass(TextOutputFormat.class);
 96 | 				
 97 | 		job.waitForCompletion(true);
 98 | 	}
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/bookRecommend.java:
--------------------------------------------------------------------------------
 1 | package bookTuijian;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | import java.util.regex.Pattern;
 8 | 
 9 | 
10 | public class bookRecommend {
11 | 
12 | 	/**
13 | 	 * @param args
14 | 	 * 驱动程序，控制所有的计算结果
15 | 	 */
16 | 	public static final String  HDFS = "hdfs://127.0.0.1:9000";
17 | 	public static final Pattern DELIMITER = Pattern.compile("[\t,]");
18 | 	
19 | 	public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
20 | 		// TODO Auto-generated method stub
21 | 		Map<String,String>  path = new HashMap<String,String>();
22 | 		path.put("local_file", "MyItems/bookTuijian/score.txt");          //本地文件所在的目录
23 | 		path.put("hdfs_root_file", HDFS+"/mr/bookRecommend/score");       //上传本地文件到HDFS上的存放路径
24 | 		
25 | 		path.put("hdfs_step1_input", path.get("hdfs_root_file"));       //step1的输入文件存放目录
26 | 		path.put("hdfs_step1_output", HDFS+"/mr/bookRecommend/step1"); //hdfs上第一步运行的结果存放文件目录
27 | 		
28 | 		path.put("hdfs_step2_input", path.get("hdfs_step1_output"));           //step2的输入文件目录
29 | 		path.put("hdfs_step2_output", HDFS+"/mr/bookRecommend/step2");      //step2的输出文件目录
30 | 		
31 | 		path.put("hdfs_step3_1_input", path.get("hdfs_step1_output"));        //构建评分矩阵
32 | 		path.put("hdfs_step3_1_output", HDFS+"/mr/bookRecommend/Step3_1");
33 | 		
34 | 		path.put("hdfs_step3_2_input", path.get("hdfs_step2_output"));        //构建同现矩阵
35 | 		path.put("hdfs_step3_2_output", HDFS+"/mr/bookRecommend/Step3_2");
36 | 		
37 | 		path.put("hdfs_step4_input_1", path.get("hdfs_step3_1_output"));    //计算乘积
38 | 		path.put("hdfs_step4_input_2", path.get("hdfs_step3_2_output"));
39 | 		path.put("hdfs_step4_output", HDFS+"/mr/bookRecommend/result");
40 | 		
41 | 		path.put("hdfs_step4_updata_input",path.get("hdfs_step3_1_output")); //Step4进行优化
42 | 		path.put("hdfs_step4_updata2_input",path.get("hdfs_step3_2_output"));
43 | 		path.put("hdfs_step4_updata_output", HDFS+"/mr/bookRecommend/Step4_Updata");
44 | 		
45 | 		path.put("hdfs_step4_updata2_input",path.get("hdfs_step4_updata_output")); //Step4进行优化
46 | 		path.put("hdfs_step4_updata2_output", HDFS+"/mr/bookRecommend/Step4_Updata2");
47 | 		
48 | 		
49 | //	    Step1.run(path); 
50 | //	    Step2.run(path); 
51 | //		Step3_1.run(path);   //构造评分矩阵
52 | //		Step3_2.run(path);   //构造同现矩阵
53 | //		Step4.run(path);       //计算乘积
54 | //		Step4_Updata.run(path);
55 | 	    Step4_Updata2.run(path);	
56 | 	    System.exit(0);
57 | 	}
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/hdfsGYT.java:
--------------------------------------------------------------------------------
  1 | package bookTuijian;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URI;
  5 | import java.net.URISyntaxException;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.FSDataInputStream;
  9 | import org.apache.hadoop.fs.FileStatus;
 10 | import org.apache.hadoop.fs.FileSystem;
 11 | import org.apache.hadoop.fs.Path;
 12 | import org.apache.hadoop.io.IOUtils;
 13 | 
 14 | public class hdfsGYT {
 15 | 	
 16 | 	private static  final String HDFS = "hdfs://127.0.0.1:9000/";
 17 | 
 18 | 	public hdfsGYT(String hdfs,  Configuration conf ){
 19 | 		this.hdfsPath = hdfs;
 20 | 		this.conf = conf;
 21 | 	}
 22 | 
 23 | 	public hdfsGYT() {
 24 | 		// TODO Auto-generated constructor stub
 25 | 	}
 26 | 
 27 | 	private String hdfsPath;
 28 | 	private Configuration conf = new Configuration() ;
 29 | 	
 30 | 	public static void main(String[] args) throws IOException, URISyntaxException{
 31 | 		hdfsGYT hdfsgyt = new hdfsGYT();
 32 | 		String folder = HDFS + "mr/groom_system/small2.csv";
 33 | 		String local = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian/small2.csv";
 34 | 		String local1 = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian";
 35 | 		//判断某个文件夹是否存在
 36 | 		//hdfsgyt.isExist(folder);
 37 | 		//创建文件夹
 38 | 		//hdfsgyt.mkdir(folder);
 39 | 		//删除文件夹
 40 | 		//hdfsgyt.rmr(folder);
 41 | 		//列出所有文件夹
 42 | 		//hdfsgyt.ls(folder);
 43 | 		//递归列出所有文件夹
 44 | 		//hdfsgyt.lsr(folder);
 45 | 		//上传文件
 46 | 		//hdfsgyt.put(local, folder);
 47 | 		//下载文件
 48 | 		//hdfsgyt.get(folder,local1);
 49 | 		//删除文件
 50 | 		//hdfsgyt.rm(folder);
 51 | 		//显示文件
 52 | 		//hdfsgyt.cat(folder);
 53 | 		//重命名文件
 54 | //		String path1 = HDFS + "mr/output";
 55 | //		String path2 = HDFS + "mr/input";
 56 | //		hdfsgyt.rename(path1,path2);
 57 | 	}
 58 | 	
 59 | 	//重命名文件
 60 | 	public void rename(String path1, String path2) throws IOException, URISyntaxException {
 61 | 		// TODO Auto-generated method stub
 62 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());	
 63 | 		try{
 64 | 			fs.rename(new Path(path1), new Path(path2 ) );
 65 | 			System.out.println("Rename   " + path1 + " To  " + path2 );
 66 | 		}finally{
 67 |             fs.close();
 68 | 		}
 69 | 	}
 70 | 
 71 | 	//显示文件
 72 | 	public static void cat(String folder) throws IOException, URISyntaxException {
 73 | 		// 与hdfs建立联系
 74 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 75 | 		Path path = new Path(folder);
 76 | 		 FSDataInputStream fsdis = null;
 77 | 	        System.out.println("cat: " + folder);
 78 | 	        try {  
 79 | 	            fsdis =fs.open(path);
 80 | 	            IOUtils.copyBytes(fsdis, System.out, 4096, false);  
 81 | 	          } finally {  
 82 | 	            IOUtils.closeStream(fsdis);
 83 | 	            fs.close();
 84 | 	          }
 85 | 	}
 86 | 
 87 | 	//删除文件
 88 | 	public static void rm(String folder) throws IOException, URISyntaxException {
 89 | 		//与hdfs建立联系
 90 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 91 | 		Path path = new Path(folder);
 92 | 		if(fs.deleteOnExit(path)){
 93 | 			fs.delete(path);
 94 | 			System.out.println("delete:" + folder);
 95 | 		}else{
 96 | 			System.out.println("The fiel is not exist!");
 97 | 		}
 98 | 		fs.close();
 99 | 	}
100 | 	
101 | 	//下载文件
102 | 	public static void get(String remote,  String local) throws IllegalArgumentException, IOException, URISyntaxException {
103 | 		// 建立联系
104 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
105 | 		fs.copyToLocalFile(new Path(remote), new Path(local));
106 | 		System.out.println("Get From :   " + remote  + "   To :" + local);
107 | 		fs.close();
108 | 	}
109 | 	
110 | 	//上传文件
111 | 	public static void put(String local, String remote) throws IOException, URISyntaxException {
112 | 		// 建立联系
113 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
114 | 		fs.copyFromLocalFile(new Path(local), new Path(remote));
115 | 		System.out.println("Put :" + local  + "   To : " + remote);
116 | 		fs.close();
117 | 	}
118 | 
119 | 	//递归列出所有文件夹
120 | 	public static void lsr(String folder) throws IOException, URISyntaxException {
121 | 		//与hdfs建立联系
122 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
123 | 		Path path = new Path(folder);
124 | 		//得到该目录下的所有文件
125 | 		FileStatus[] fileList = fs.listStatus(path);
126 | 		for (FileStatus f : fileList) {
127 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
128 |             try{
129 |             	FileStatus[] fileListR = fs.listStatus(f.getPath());
130 |             	for(FileStatus fr:fileListR){
131 |                     System.out.printf("name: %s   |   folder: %s  |   size: %d\n", fr.getPath(),  fr.isDir() , fr.getLen());
132 |             	}
133 |             }finally{
134 |             	continue;
135 |             }
136 |         }
137 | 		fs.close();
138 | 	}
139 | 	
140 | 	//列出所有文件夹
141 | 	public static void ls(String folder) throws IOException, URISyntaxException {
142 | 		//与hdfs建立联系
143 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
144 | 		Path path = new Path(folder);
145 | 		//得到该目录下的所有文件
146 | 		FileStatus[] fileList = fs.listStatus(path);
147 | 		for (FileStatus f : fileList) {
148 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
149 |         }
150 | 		fs.close();
151 | 	}
152 | 
153 | 	//删除文件夹
154 | 	public static void rmr(String folder) throws IOException, URISyntaxException {
155 | 		//与hdfs建立联系
156 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
157 | 		Path path = new Path(folder);
158 | 		fs.delete(path);
159 | 		System.out.println("delete:" + folder);
160 | 		fs.close();
161 | 	}
162 | 	
163 | 	//创建文件夹
164 | 	public static void mkdir(String folder) throws IOException, URISyntaxException {
165 | 		//与hdfs建立联系
166 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
167 | 		Path path = new Path(folder);
168 | 		if (!fs.exists(path)) {
169 | 			fs.mkdirs(path);
170 | 			System.out.println("Create: " + folder);
171 | 		}else{
172 | 			System.out.println("it is have exist:" + folder);
173 | 		}
174 | 		fs.close();	
175 | 	}
176 | 	
177 | 	//判断某个文件夹是否存在
178 | 	public static void isExist(String folder) throws IOException, URISyntaxException {
179 | 		//与hdfs建立联系
180 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
181 | 		Path path = new Path(folder);
182 | 		if(fs.exists(path)){
183 | 			System.out.println("it is have exist:" + folder);
184 | 		}else{
185 | 			System.out.println("it is not exist:" + folder);
186 | 		}	
187 | 		fs.close();
188 | 	}
189 | 	
190 | }


--------------------------------------------------------------------------------
/Hadoop/bookTuijian/uid_to_bid.csv:
--------------------------------------------------------------------------------
 1 | 1,101,5.0
 2 | 1,102,3.0
 3 | 1,103,2.5
 4 | 2,101,2.0
 5 | 2,102,2.5
 6 | 2,103,5.0
 7 | 2,104,2.0
 8 | 3,101,2.5
 9 | 3,104,4.0
10 | 3,105,4.5
11 | 3,107,5.0
12 | 4,101,5.0
13 | 4,103,3.0
14 | 4,104,4.5
15 | 4,106,4.0
16 | 5,101,4.0
17 | 5,102,3.0
18 | 5,103,2.0
19 | 5,104,4.0
20 | 5,105,3.5
21 | 5,106,4.0


--------------------------------------------------------------------------------
/Hadoop/hdfs/hdfsGYT.java:
--------------------------------------------------------------------------------
  1 | package WebKPI;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URI;
  5 | import java.net.URISyntaxException;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.FSDataInputStream;
  9 | import org.apache.hadoop.fs.FileStatus;
 10 | import org.apache.hadoop.fs.FileSystem;
 11 | import org.apache.hadoop.fs.Path;
 12 | import org.apache.hadoop.io.IOUtils;
 13 | 
 14 | public class hdfsGYT {
 15 | 	
 16 | 	private static  final String HDFS = "hdfs://127.0.0.1:9000/";
 17 | 
 18 | 	public hdfsGYT(String hdfs,  Configuration conf ){
 19 | 		this.hdfsPath = hdfs;
 20 | 		this.conf = conf;
 21 | 	}
 22 | 
 23 | 	public hdfsGYT() {
 24 | 		// TODO Auto-generated constructor stub
 25 | 	}
 26 | 
 27 | 	private String hdfsPath;
 28 | 	private Configuration conf = new Configuration() ;
 29 | 	
 30 | 	public static void main(String[] args) throws IOException, URISyntaxException{
 31 | 		hdfsGYT hdfsgyt = new hdfsGYT();
 32 | 		String folder = HDFS + "mr/groom_system/small2.csv";
 33 | 		String local = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian/small2.csv";
 34 | 		String local1 = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian";
 35 | 		//判断某个文件夹是否存在
 36 | 		//hdfsgyt.isExist(folder);
 37 | 		//创建文件夹
 38 | 		//hdfsgyt.mkdir(folder);
 39 | 		//删除文件夹
 40 | 		//hdfsgyt.rmr(folder);
 41 | 		//列出所有文件夹
 42 | 		//hdfsgyt.ls(folder);
 43 | 		//递归列出所有文件夹
 44 | 		//hdfsgyt.lsr(folder);
 45 | 		//上传文件
 46 | 		//hdfsgyt.put(local, folder);
 47 | 		//下载文件
 48 | 		//hdfsgyt.get(folder,local1);
 49 | 		//删除文件
 50 | 		//hdfsgyt.rm(folder);
 51 | 		//显示文件
 52 | 		//hdfsgyt.cat(folder);
 53 | 		//重命名文件
 54 | //		String path1 = HDFS + "mr/output";
 55 | //		String path2 = HDFS + "mr/input";
 56 | //		hdfsgyt.rename(path1,path2);
 57 | 	}
 58 | 	
 59 | 	//重命名文件
 60 | 	public void rename(String path1, String path2) throws IOException, URISyntaxException {
 61 | 		// TODO Auto-generated method stub
 62 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());	
 63 | 		try{
 64 | 			fs.rename(new Path(path1), new Path(path2 ) );
 65 | 			System.out.println("Rename   " + path1 + " To  " + path2 );
 66 | 		}finally{
 67 |             fs.close();
 68 | 		}
 69 | 	}
 70 | 
 71 | 	//显示文件
 72 | 	public static void cat(String folder) throws IOException, URISyntaxException {
 73 | 		// 与hdfs建立联系
 74 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 75 | 		Path path = new Path(folder);
 76 | 		 FSDataInputStream fsdis = null;
 77 | 	        System.out.println("cat: " + folder);
 78 | 	        try {  
 79 | 	            fsdis =fs.open(path);
 80 | 	            IOUtils.copyBytes(fsdis, System.out, 4096, false);  
 81 | 	          } finally {  
 82 | 	            IOUtils.closeStream(fsdis);
 83 | 	            fs.close();
 84 | 	          }
 85 | 	}
 86 | 
 87 | 	//删除文件
 88 | 	public static void rm(String folder) throws IOException, URISyntaxException {
 89 | 		//与hdfs建立联系
 90 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 91 | 		Path path = new Path(folder);
 92 | 		if(fs.deleteOnExit(path)){
 93 | 			fs.delete(path);
 94 | 			System.out.println("delete:" + folder);
 95 | 		}else{
 96 | 			System.out.println("The fiel is not exist!");
 97 | 		}
 98 | 		fs.close();
 99 | 	}
100 | 	
101 | 	//下载文件
102 | 	public static void get(String remote,  String local) throws IllegalArgumentException, IOException, URISyntaxException {
103 | 		// 建立联系
104 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
105 | 		fs.copyToLocalFile(new Path(remote), new Path(local));
106 | 		System.out.println("Get From :   " + remote  + "   To :" + local);
107 | 		fs.close();
108 | 	}
109 | 	
110 | 	//上传文件
111 | 	public static void put(String local, String remote) throws IOException, URISyntaxException {
112 | 		// 建立联系
113 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
114 | 		fs.copyFromLocalFile(new Path(local), new Path(remote));
115 | 		System.out.println("Put :" + local  + "   To : " + remote);
116 | 		fs.close();
117 | 	}
118 | 
119 | 	//递归列出所有文件夹
120 | 	public static void lsr(String folder) throws IOException, URISyntaxException {
121 | 		//与hdfs建立联系
122 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
123 | 		Path path = new Path(folder);
124 | 		//得到该目录下的所有文件
125 | 		FileStatus[] fileList = fs.listStatus(path);
126 | 		for (FileStatus f : fileList) {
127 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
128 |             try{
129 |             	FileStatus[] fileListR = fs.listStatus(f.getPath());
130 |             	for(FileStatus fr:fileListR){
131 |                     System.out.printf("name: %s   |   folder: %s  |   size: %d\n", fr.getPath(),  fr.isDir() , fr.getLen());
132 |             	}
133 |             }finally{
134 |             	continue;
135 |             }
136 |         }
137 | 		fs.close();
138 | 	}
139 | 	
140 | 	//列出所有文件夹
141 | 	public static void ls(String folder) throws IOException, URISyntaxException {
142 | 		//与hdfs建立联系
143 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
144 | 		Path path = new Path(folder);
145 | 		//得到该目录下的所有文件
146 | 		FileStatus[] fileList = fs.listStatus(path);
147 | 		for (FileStatus f : fileList) {
148 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
149 |         }
150 | 		fs.close();
151 | 	}
152 | 
153 | 	//删除文件夹
154 | 	public static void rmr(String folder) throws IOException, URISyntaxException {
155 | 		//与hdfs建立联系
156 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
157 | 		Path path = new Path(folder);
158 | 		fs.delete(path);
159 | 		System.out.println("delete:" + folder);
160 | 		fs.close();
161 | 	}
162 | 	
163 | 	//创建文件夹
164 | 	public static void mkdir(String folder) throws IOException, URISyntaxException {
165 | 		//与hdfs建立联系
166 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
167 | 		Path path = new Path(folder);
168 | 		if (!fs.exists(path)) {
169 | 			fs.mkdirs(path);
170 | 			System.out.println("Create: " + folder);
171 | 		}else{
172 | 			System.out.println("it is have exist:" + folder);
173 | 		}
174 | 		fs.close();	
175 | 	}
176 | 	
177 | 	//判断某个文件夹是否存在
178 | 	public static void isExist(String folder) throws IOException, URISyntaxException {
179 | 		//与hdfs建立联系
180 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
181 | 		Path path = new Path(folder);
182 | 		if(fs.exists(path)){
183 | 			System.out.println("it is have exist:" + folder);
184 | 		}else{
185 | 			System.out.println("it is not exist:" + folder);
186 | 		}	
187 | 		fs.close();
188 | 	}
189 | 	
190 | }


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/mulIn-output/part-r-00000:
--------------------------------------------------------------------------------
 1 | 中国	3
 2 | 中国	我们
 3 | 中国	3
 4 | 中国	我们
 5 | 中国人	很多
 6 | 中国人	很多
 7 | 美国	32
 8 | 美国	它们
 9 | 美国	32
10 | 美国	它们
11 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/mulOut-output/china-r-00000:
--------------------------------------------------------------------------------
1 | 中国	3
2 | 中国	我们
3 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/mulOut-output/cpeople-r-00000:
--------------------------------------------------------------------------------
1 | 中国人	很多
2 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/mulOut-output/usa-r-00000:
--------------------------------------------------------------------------------
1 | 美国	32
2 | 美国	它们
3 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/multipleinout.java:
--------------------------------------------------------------------------------
 1 | package multiple_In_Out;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.Mapper;
10 | import org.apache.hadoop.mapreduce.Reducer;
11 | import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
12 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
15 | 
16 | public class multipleinout {
17 | 
18 | 	static String input1 = "hdfs://127.0.0.1:9000/mr/input1";
19 | 	static String input2 = "hdfs://127.0.0.1:9000/mr/input2";
20 | 	static String output = "hdfs://127.0.0.1:9000/mr/output";
21 | 
22 | 	public static class Map extends Mapper<LongWritable, Text , Text, Text>{
23 | 		private static Text k = new Text();
24 | 		private static Text v = new Text();
25 | 		
26 | 		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
27 | 			String[] list = value.toString().split(",");
28 | 			k.set(list[0]);
29 | 			v.set(list[1]);
30 | 			context.write(k, v);
31 | 		}		
32 | 	}
33 | 	
34 | 	public static class Reduce extends Reducer<Text , Text, Text, Text>{
35 | 		
36 | 		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{
37 | 			for (Text text : values) {
38 | 				context.write(key, text);
39 | 			}
40 | 		}
41 | 
42 | 	}
43 | 	/**
44 | 	 * @param args
45 | 	 * @throws IOException 
46 | 	 * @throws InterruptedException 
47 | 	 * @throws ClassNotFoundException 
48 | 	 */
49 | 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
50 | 		// TODO Auto-generated method stub
51 | 		Job job = new Job();
52 | 		job.setJarByClass(multipleoutput.class);
53 | 		
54 | 		job.setMapperClass(Map.class);
55 | 		job.setMapOutputKeyClass(Text.class);
56 | 		job.setMapOutputValueClass(Text.class);
57 | 		
58 | 		job.setReducerClass(Reduce.class);
59 | 		job.setOutputKeyClass(Text.class);
60 | 		job.setOutputValueClass(Text.class);
61 | 		
62 | 		job.setInputFormatClass(TextInputFormat.class);
63 | 		job.setOutputFormatClass(TextOutputFormat.class);
64 | 		
65 | 		MultipleInputs.addInputPath(job, new Path(input1), TextInputFormat.class, Map.class);
66 | 		MultipleInputs.addInputPath(job, new Path(input2), TextInputFormat.class, Map.class);
67 | 		
68 | 		FileOutputFormat.setOutputPath(job, new Path(output));
69 | 		
70 | 		System.exit(job.waitForCompletion(true)?0:1);
71 | 	}
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/multipleinput_input:
--------------------------------------------------------------------------------
1 | 中国,我们
2 | 美国,它们
3 | 中国,3
4 | 美国,32
5 | 中国人,很多
6 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/multipleoutput.java:
--------------------------------------------------------------------------------
 1 | package multiple_In_Out;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.Mapper;
10 | import org.apache.hadoop.mapreduce.Reducer;
11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14 | import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
15 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
16 | 
17 | public class multipleoutput {
18 | 	
19 | 	static String input = "hdfs://127.0.0.1:9000/mr/input";
20 | 	static String output = "hdfs://127.0.0.1:9000/mr/output";
21 | 
22 | 	public static class Map extends Mapper<LongWritable, Text , Text, Text>{
23 | 		private static Text k = new Text();
24 | 		private static Text v = new Text();
25 | 		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
26 | 			String[] list = value.toString().split(",");
27 | 			k.set(list[0]);
28 | 			v.set(list[1]);
29 | 			context.write(k, v);
30 | 		}
31 | 	}
32 | 	
33 | 	public static class Reduce extends Reducer<Text , Text, Text, Text>{
34 | 		private MultipleOutputs mos;
35 | 		public void setup(Context context){
36 | 			mos = new MultipleOutputs(context);
37 | 		}
38 | 		
39 | 		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{
40 | 			String k = key.toString();
41 | 			for(Text t : values){
42 | 				if("中国".equals(k)){
43 | 					System.out.println(t.toString());
44 | 	                mos.write("china",new Text("中国"), t);
45 | 	            }else if("美国".equals(k)){
46 | 					System.out.println(t.toString());
47 | 	                mos.write("usa",new Text("美国"),t);
48 | 	            }else if("中国人".equals(k)){
49 | 					System.out.println(t.toString());
50 | 	                mos.write("cpeople",new Text("中国人"),t);
51 | 	            }
52 | 			}
53 | 		}
54 | 		
55 | 		public void cleanup(Context context) throws IOException, InterruptedException{
56 | 			mos.close();
57 | 		}
58 | 	}
59 | 	/**
60 | 	 * @param args
61 | 	 * @throws IOException 
62 | 	 * @throws InterruptedException 
63 | 	 * @throws ClassNotFoundException 
64 | 	 */
65 | 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
66 | 		// TODO Auto-generated method stub
67 | 		Job job = new Job();
68 | 		job.setJarByClass(multipleoutput.class);
69 | 		
70 | 		job.setMapperClass(Map.class);
71 | 		job.setMapOutputKeyClass(Text.class);
72 | 		job.setMapOutputValueClass(Text.class);
73 | 		
74 | 		job.setReducerClass(Reduce.class);
75 | 		job.setOutputKeyClass(Text.class);
76 | 		job.setOutputValueClass(Text.class);
77 | 		
78 | 		job.setInputFormatClass(TextInputFormat.class);
79 | 		job.setOutputFormatClass(TextOutputFormat.class);
80 | 		
81 | 		MultipleOutputs.addNamedOutput(job, "china", TextOutputFormat.class, Text.class, Text.class);
82 | 		MultipleOutputs.addNamedOutput(job, "usa", TextOutputFormat.class, Text.class, Text.class);
83 | 		MultipleOutputs.addNamedOutput(job, "cpeople", TextOutputFormat.class, Text.class, Text.class);
84 | 		
85 | 		FileInputFormat.addInputPath(job, new Path(input));
86 | 		FileOutputFormat.setOutputPath(job, new Path(output));
87 | 		
88 | 		System.exit(job.waitForCompletion(true)?0:1);
89 | 	}
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/Hadoop/multiple_In_Out/multipleoutput_input:
--------------------------------------------------------------------------------
1 | 中国,我们
2 | 美国,它们
3 | 中国,3
4 | 美国,32
5 | 中国人,很多
6 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/dataEtl.java:
--------------------------------------------------------------------------------
 1 | package pagerankjisuan;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.File;
 5 | import java.io.FileNotFoundException;
 6 | import java.io.FileReader;
 7 | import java.io.FileWriter;
 8 | import java.io.IOException;
 9 | 
10 | public class dataEtl {
11 | 
12 | 	public static void main() throws IOException {
13 | 
14 | 		File f1 = new File("MyItems/pagerankjisuan/people.csv");
15 | 		if(f1.isFile()){
16 | 			f1.delete();
17 | 		}
18 | 		File f = new File("MyItems/pagerankjisuan/peoplerank.txt");
19 | 		if(f.isFile()){
20 | 			f.delete();
21 | 		}
22 | 		//打开文件
23 | 		File file = new File("MyItems/pagerankjisuan/day7_author100_mess.csv");
24 | 		//定义一个文件指针
25 | 		BufferedReader reader = new BufferedReader(new FileReader(file));
26 | 		try {
27 | 			String line=null;
28 | 			//判断读取的一行是否为空
29 | 			while( (line=reader.readLine()) != null)
30 | 			{
31 | 					String[] userMess = line.split( "," );
32 | 					//第一字段为id，第是个字段为粉丝列表
33 | 					String userid = userMess[0];
34 | 					if(userMess.length!=0){
35 | 							if(userMess.length==11)
36 | 							{
37 | 									int i=0;
38 | 									String[] focusName = userMess[10].split("\\|"); //  | 为转义符
39 | 									for (i=1;i < focusName.length; i++) 
40 | 										{
41 | 											write(userid,focusName[i]);
42 | //											System.out.println(userid+ "           " + focusName[i]);
43 | 										}
44 | 							}
45 | 							else
46 | 							{
47 | 									int j =0;
48 | 									String[] focusName = userMess[9].split("\\|"); //  | 为转义符
49 | 									for (j=1;j < focusName.length; j++) 
50 | 									{
51 | 										write(userid,focusName[j]);
52 | //										System.out.println(userid+ "           " + focusName[j]);
53 | 									}
54 | 							}		
55 | 					}
56 | 				}
57 | 			} 
58 | 			catch (FileNotFoundException e) {
59 | 				// TODO Auto-generated catch block
60 | 				e.printStackTrace();
61 | 			}
62 | 			finally
63 | 			{
64 | 					reader.close();
65 | 				
66 | 					//etl peoplerank.txt
67 | 					for(int i=1;i<=100;i++){
68 | 						FileWriter writer = new FileWriter("MyItems/pagerankjisuan/peoplerank.txt",true);
69 | 						writer.write(i + "\t" + 1 + "\n");
70 | 						writer.close();
71 | 					}
72 | 			}
73 | 			System.out.println("OK..................");
74 | 	}
75 | 
76 | 	private static void write(String userid, String nameid) {
77 | 		// TODO Auto-generated method stub
78 | 		//定义写文件，按行写入
79 | 		try {
80 | 			if(!nameid.contains("\n")){
81 | 				FileWriter writer = new FileWriter("MyItems/pagerankjisuan/people.csv",true);
82 | 				writer.write(userid + "," + nameid + "\n");
83 | 				writer.close();
84 | 			}
85 | 		} catch (IOException e) {
86 | 			// TODO Auto-generated catch block
87 | 			e.printStackTrace();
88 | 		}
89 | 	}
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/hdfsGYT.java:
--------------------------------------------------------------------------------
  1 | package pagerankjisuan;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URI;
  5 | import java.net.URISyntaxException;
  6 | 
  7 | import org.apache.hadoop.conf.Configuration;
  8 | import org.apache.hadoop.fs.FSDataInputStream;
  9 | import org.apache.hadoop.fs.FileStatus;
 10 | import org.apache.hadoop.fs.FileSystem;
 11 | import org.apache.hadoop.fs.Path;
 12 | import org.apache.hadoop.io.IOUtils;
 13 | 
 14 | public class hdfsGYT {
 15 | 	
 16 | 	private static  final String HDFS = "hdfs://127.0.0.1:9000/";
 17 | 
 18 | 	public hdfsGYT(String hdfs,  Configuration conf ){
 19 | 		this.hdfsPath = hdfs;
 20 | 		this.conf = conf;
 21 | 	}
 22 | 
 23 | 	public hdfsGYT() {
 24 | 		// TODO Auto-generated constructor stub
 25 | 	}
 26 | 
 27 | 	private String hdfsPath;
 28 | 	private Configuration conf = new Configuration() ;
 29 | 	
 30 | 	public static void main(String[] args) throws IOException, URISyntaxException{
 31 | 		hdfsGYT hdfsgyt = new hdfsGYT();
 32 | 		String folder = HDFS + "mr/groom_system/small2.csv";
 33 | 		String local = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian/small2.csv";
 34 | 		String local1 = "/home/thinkgamer/Java/hadoop_shizhan/src/user_thing_tuijian";
 35 | 		//判断某个文件夹是否存在
 36 | 		//hdfsgyt.isExist(folder);
 37 | 		//创建文件夹
 38 | 		//hdfsgyt.mkdir(folder);
 39 | 		//删除文件夹
 40 | 		//hdfsgyt.rmr(folder);
 41 | 		//列出所有文件夹
 42 | 		//hdfsgyt.ls(folder);
 43 | 		//递归列出所有文件夹
 44 | 		//hdfsgyt.lsr(folder);
 45 | 		//上传文件
 46 | 		//hdfsgyt.put(local, folder);
 47 | 		//下载文件
 48 | 		//hdfsgyt.get(folder,local1);
 49 | 		//删除文件
 50 | 		//hdfsgyt.rm(folder);
 51 | 		//显示文件
 52 | 		//hdfsgyt.cat(folder);
 53 | 		//重命名文件
 54 | //		String path1 = HDFS + "mr/output";
 55 | //		String path2 = HDFS + "mr/input";
 56 | //		hdfsgyt.rename(path1,path2);
 57 | 	}
 58 | 	
 59 | 	//重命名文件
 60 | 	public void rename(String path1, String path2) throws IOException, URISyntaxException {
 61 | 		// TODO Auto-generated method stub
 62 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());	
 63 | 		try{
 64 | 			fs.rename(new Path(path1), new Path(path2 ) );
 65 | 			System.out.println("Rename   " + path1 + " To  " + path2 );
 66 | 		}finally{
 67 |             fs.close();
 68 | 		}
 69 | 	}
 70 | 
 71 | 	//显示文件
 72 | 	public static void cat(String folder) throws IOException, URISyntaxException {
 73 | 		// 与hdfs建立联系
 74 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 75 | 		Path path = new Path(folder);
 76 | 		 FSDataInputStream fsdis = null;
 77 | 	        System.out.println("cat: " + folder);
 78 | 	        try {  
 79 | 	            fsdis =fs.open(path);
 80 | 	            IOUtils.copyBytes(fsdis, System.out, 4096, false);  
 81 | 	          } finally {  
 82 | 	            IOUtils.closeStream(fsdis);
 83 | 	            fs.close();
 84 | 	          }
 85 | 	}
 86 | 
 87 | 	//删除文件
 88 | 	public static void rm(String folder) throws IOException, URISyntaxException {
 89 | 		//与hdfs建立联系
 90 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
 91 | 		Path path = new Path(folder);
 92 | 		if(fs.deleteOnExit(path)){
 93 | 			fs.delete(path);
 94 | 			System.out.println("delete:" + folder);
 95 | 		}else{
 96 | 			System.out.println("The fiel is not exist!");
 97 | 		}
 98 | 		fs.close();
 99 | 	}
100 | 	
101 | 	//下载文件
102 | 	public static void get(String remote,  String local) throws IllegalArgumentException, IOException, URISyntaxException {
103 | 		// 建立联系
104 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
105 | 		fs.copyToLocalFile(new Path(remote), new Path(local));
106 | 		System.out.println("Get From :   " + remote  + "   To :" + local);
107 | 		fs.close();
108 | 	}
109 | 	
110 | 	//上传文件
111 | 	public static void put(String local, String remote) throws IOException, URISyntaxException {
112 | 		// 建立联系
113 | 		FileSystem fs = FileSystem.get(new URI(HDFS), new Configuration());
114 | 		fs.copyFromLocalFile(new Path(local), new Path(remote));
115 | 		System.out.println("Put :" + local  + "   To : " + remote);
116 | 		fs.close();
117 | 	}
118 | 
119 | 	//递归列出所有文件夹
120 | 	public static void lsr(String folder) throws IOException, URISyntaxException {
121 | 		//与hdfs建立联系
122 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
123 | 		Path path = new Path(folder);
124 | 		//得到该目录下的所有文件
125 | 		FileStatus[] fileList = fs.listStatus(path);
126 | 		for (FileStatus f : fileList) {
127 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
128 |             try{
129 |             	FileStatus[] fileListR = fs.listStatus(f.getPath());
130 |             	for(FileStatus fr:fileListR){
131 |                     System.out.printf("name: %s   |   folder: %s  |   size: %d\n", fr.getPath(),  fr.isDir() , fr.getLen());
132 |             	}
133 |             }finally{
134 |             	continue;
135 |             }
136 |         }
137 | 		fs.close();
138 | 	}
139 | 	
140 | 	//列出所有文件夹
141 | 	public static void ls(String folder) throws IOException, URISyntaxException {
142 | 		//与hdfs建立联系
143 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
144 | 		Path path = new Path(folder);
145 | 		//得到该目录下的所有文件
146 | 		FileStatus[] fileList = fs.listStatus(path);
147 | 		for (FileStatus f : fileList) {
148 |             System.out.printf("name: %s   |   folder: %s  |   size: %d\n", f.getPath(),  f.isDir() , f.getLen());
149 |         }
150 | 		fs.close();
151 | 	}
152 | 
153 | 	//删除文件夹
154 | 	public static void rmr(String folder) throws IOException, URISyntaxException {
155 | 		//与hdfs建立联系
156 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
157 | 		Path path = new Path(folder);
158 | 		fs.delete(path);
159 | 		System.out.println("delete:" + folder);
160 | 		fs.close();
161 | 	}
162 | 	
163 | 	//创建文件夹
164 | 	public static void mkdir(String folder) throws IOException, URISyntaxException {
165 | 		//与hdfs建立联系
166 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
167 | 		Path path = new Path(folder);
168 | 		if (!fs.exists(path)) {
169 | 			fs.mkdirs(path);
170 | 			System.out.println("Create: " + folder);
171 | 		}else{
172 | 			System.out.println("it is have exist:" + folder);
173 | 		}
174 | 		fs.close();	
175 | 	}
176 | 	
177 | 	//判断某个文件夹是否存在
178 | 	public static void isExist(String folder) throws IOException, URISyntaxException {
179 | 		//与hdfs建立联系
180 | 		FileSystem fs = FileSystem.get(new URI(HDFS),new Configuration());
181 | 		Path path = new Path(folder);
182 | 		if(fs.exists(path)){
183 | 			System.out.println("it is have exist:" + folder);
184 | 		}else{
185 | 			System.out.println("it is not exist:" + folder);
186 | 		}	
187 | 		fs.close();
188 | 	}
189 | 	
190 | }


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/prJisuan.java:
--------------------------------------------------------------------------------
  1 | package pagerankjisuan;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.HashMap;
  6 | import java.util.Iterator;
  7 | import java.util.Map;
  8 | 
  9 | import org.apache.hadoop.fs.Path;
 10 | import org.apache.hadoop.io.LongWritable;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.mapreduce.Job;
 13 | import org.apache.hadoop.mapreduce.Mapper;
 14 | import org.apache.hadoop.mapreduce.Reducer;
 15 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 16 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 17 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 | public class prJisuan {
 21 | 	
 22 | 	public static class prJisuanMapper extends Mapper <LongWritable,Text,Text,Text>{
 23 | 		
 24 | 		private String flag; //tmp1  or  result
 25 | 		private static int nums = 100;  //页面数
 26 | 		private static Text k =new Text();
 27 | 		private static Text v =new Text();
 28 | 		
 29 | 		protected void setup(Context context){
 30 | 			FileSplit split = (FileSplit) context.getInputSplit();
 31 | 			flag = split.getPath().getParent().getName();  //判断读的数据集
 32 | 		}
 33 | 		
 34 | 		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
 35 | 		
 36 | 		if(flag.endsWith("tmp1")){
 37 | 			String[] tokens = value.toString().split("\t");
 38 | 			String row =tokens[0];
 39 | 			String[] vals = tokens[1].split(",");//转置矩阵
 40 | 			for (int i =0;i<vals.length;i++){
 41 | 				k = new Text(String.valueOf(i+1) );
 42 | 				v = new Text(String.valueOf("A:" + (row) + "," + vals[i]) );
 43 | 				context.write(k, v);
 44 | 			}
 45 | 		}else if(flag.equals("pr")){
 46 | 			String[] tokens = value.toString().split("\t");
 47 | 			for(int i=1;i<=nums;i++){
 48 | 				k = new Text(String.valueOf(i));
 49 | 				v = new Text("B:" + tokens[0]+ "," + tokens[1] );
 50 | 				context.write(k,v);
 51 | 			}
 52 | 		}
 53 | 		
 54 | 		}
 55 | 	}
 56 | 	
 57 | 	public static class prJisuanReducer extends Reducer<Text, Text, Text, Text>{
 58 | 		
 59 | 		public void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException{
 60 | 			Map<Integer, Float> mapA = new HashMap<Integer,Float>();
 61 | 			Map<Integer, Float> mapB = new HashMap<Integer,Float>();
 62 | 			float pr = 0f;
 63 | 			for (Text val : values) {
 64 | //				System.out.println(val.toString());
 65 | 				String value = val.toString();
 66 | 				if(value.startsWith("A") ){
 67 | 					String[] tokenA  = value.split(":")[1].split(",");
 68 | 					mapA.put(Integer.parseInt(tokenA[0]), Float.parseFloat(tokenA[1]) );
 69 | 				}
 70 | 				
 71 | 				if(value.startsWith("B")){
 72 | 					String[] tokenB = value.split(":")[1].split(",");
 73 | 					mapB.put(Integer.parseInt(tokenB[0]), Float.parseFloat(tokenB[1]) );
 74 | 				}
 75 | 			}
 76 | 			
 77 | 			Iterator iterA = mapA.keySet().iterator();
 78 | 			while(iterA.hasNext()){
 79 | 				int idx = Integer.parseInt( iterA.next().toString() );
 80 | 				float A = mapA.get(idx);
 81 | 				float B = mapB.get(idx);
 82 | 				pr += A * B;
 83 | //				System.out.println(idx + "         " + A + "        " + B);
 84 | 			}
 85 | 			context.write(key,new Text(prjob.scaleFloat(pr)));
 86 | 		}
 87 | 	}
 88 | 	
 89 | 	public static void main(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
 90 | 		
 91 | 		String input = path.get("tmp1");
 92 | 		String  output = path.get("tmp2");
 93 | 		String pr = path.get("input_pr");
 94 | 		
 95 | 		hdfsGYT hdfs = new hdfsGYT();
 96 | 		hdfs.rmr(output);
 97 | 		
 98 | 		Job job = new Job();
 99 | 		job.setJarByClass(prJisuan.class);
100 | 		
101 | 		//set file input 
102 | 		FileInputFormat.setInputPaths(job, new Path(input), new Path(pr));
103 | 		job.setInputFormatClass(TextInputFormat.class);
104 | 		
105 | 		//set map
106 | 		job.setMapperClass(prJisuanMapper.class);
107 | 		job.setMapOutputKeyClass(Text.class);
108 | 		job.setMapOutputValueClass(Text.class);
109 | 
110 | 		//set partition
111 | 		//set combine
112 | 		//set sort
113 | 		
114 | 		//set  reduce
115 | 		job.setReducerClass(prJisuanReducer.class);
116 | 		job.setOutputKeyClass(Text.class);
117 | 		job.setOutputValueClass(Text.class);
118 | 		
119 | 		//set outputpath
120 | 		FileOutputFormat.setOutputPath(job, new Path(output));
121 | 		job.setOutputFormatClass(TextOutputFormat.class);
122 | 		
123 | 		//upload job
124 |         job.waitForCompletion(true);
125 |         
126 |         hdfs.rmr(pr);
127 |         hdfs.rename(output, pr);
128 | 	}
129 | }
130 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/prMatrix.java:
--------------------------------------------------------------------------------
  1 | package pagerankjisuan;
  2 | 
  3 | import java.io.IOException;
  4 | import java.net.URISyntaxException;
  5 | import java.util.Arrays;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.hadoop.fs.Path;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapreduce.Job;
 12 | import org.apache.hadoop.mapreduce.Mapper;
 13 | import org.apache.hadoop.mapreduce.Reducer;
 14 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 15 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 16 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 17 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 18 | 
 19 | public class prMatrix {
 20 | 	
 21 | 	private static int nums = 100; //页面数
 22 | 	private static float d = 0.85f; //阻尼系数
 23 | 	
 24 | 	private static class MatrixMapper extends Mapper<LongWritable,Text,Text,Text>{
 25 | 		private static final Text k = new Text();
 26 | 		private static final Text v = new Text();
 27 | 		public void map(LongWritable key,Text value, Context context) throws IOException, InterruptedException{
 28 | //			System.out.println(value.toString());
 29 | 			String[] tokens = value.toString().split(",");
 30 | 			k.set(tokens[0]);
 31 | 			v.set(tokens[1]);
 32 | 			context.write(k, v);
 33 | 		}
 34 | 	}
 35 | 	
 36 | 	
 37 | 	public static class MatrixReducer extends Reducer<Text,Text,Text,Text>{
 38 | 		
 39 | 		public void reduce(Text key, Iterable<Text>values, Context context ) throws IOException, InterruptedException{
 40 | 			float[] G = new float[nums];  //概率矩阵列
 41 | 			Arrays.fill(G, (float)(1-d)   /	G.length );  //填充矩阵列
 42 | 			
 43 | 			float[] A = new float[nums] ; //近	邻矩阵列
 44 | 			int  sum=0;      //链出数量
 45 | 			for(Text text :values){
 46 | 				int idx = Integer.parseInt(text.toString());
 47 | //				System.out.println(idx + "idx -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=");
 48 | 				A[idx-1 ] = 1;
 49 | 				sum ++;
 50 | 			}
 51 | 			
 52 | 			if(sum==0){ //分母不能为0
 53 | 				sum=1;
 54 | 			}
 55 | 			
 56 | 			StringBuilder sb = new StringBuilder();
 57 | 			for(int i=0;i<A.length;i++){
 58 | 				sb.append("," + (float) (G[i]  + d * A[i] / sum) );
 59 | 			}
 60 | 			
 61 | 			Text v = new Text(sb.toString().substring(1));
 62 | //			System.out.println(key+ ":" + v.toString() );
 63 | 			context.write(key, v);
 64 | 		}
 65 | 	}
 66 | 	
 67 | 	public static void main(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
 68 | 		
 69 | 		String input = path.get("input");
 70 |         String input_pr = path.get("input_pr");
 71 |         String output = path.get("tmp1");
 72 |         
 73 |         String page = path.get("page");
 74 |         String pr = path.get("pr");
 75 | 
 76 | 		hdfsGYT hdfs = new hdfsGYT();
 77 | 		//创建需要的文件夹
 78 | 		hdfs.rmr(input);
 79 | 		hdfs.rmr(output);
 80 | 		hdfs.mkdir(input);
 81 | 		hdfs.mkdir(input_pr);
 82 | 		//上传文件到指定的目录 内
 83 | 		hdfs.put(page, input);
 84 | 		hdfs.put(pr, input_pr);
 85 | 		
 86 | 		Job job = new Job();
 87 | 		job.setJarByClass(prMatrix.class);
 88 | 		
 89 | 		job.setInputFormatClass(TextInputFormat.class);
 90 | 		job.setOutputFormatClass(TextOutputFormat.class);
 91 | 		
 92 | 		FileInputFormat.addInputPath(job, new Path(input));
 93 | 		FileOutputFormat.setOutputPath(job, new Path(output));
 94 | 		
 95 | 		job.setMapperClass(MatrixMapper.class);
 96 | 		job.setReducerClass(MatrixReducer.class);
 97 | 		
 98 | 		job.setMapOutputKeyClass(Text.class);
 99 | 		job.setMapOutputValueClass(Text.class);
100 | 		
101 | 		job.setOutputKeyClass(Text.class);
102 | 		job.setOutputValueClass(Text.class);
103 | 
104 |         job.waitForCompletion(true);
105 | 	}
106 | }
107 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/prNormal.java:
--------------------------------------------------------------------------------
 1 | package pagerankjisuan;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.ArrayList;
 6 | import java.util.List;
 7 | import java.util.Map;
 8 | 
 9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.io.LongWritable;
11 | import org.apache.hadoop.io.Text;
12 | import org.apache.hadoop.mapreduce.Job;
13 | import org.apache.hadoop.mapreduce.Mapper;
14 | import org.apache.hadoop.mapreduce.Reducer;
15 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
16 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 | 
20 | public class prNormal {
21 | 	
22 | 	public static class normalMapper extends Mapper<LongWritable,Text,Text,Text>{
23 | 		private static Text k = new Text("1");
24 | 		public void map(LongWritable key , Text value , Context context) throws IOException, InterruptedException{
25 | //			System.out.println(value.toString());
26 | 			context.write(k,value);
27 | 		}
28 | 	}
29 | 	
30 | 	public static class normalReducer extends Reducer<Text, Text , Text , Text>{
31 | 
32 | 		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{
33 | 			List<String> list = new ArrayList();
34 | 			
35 | 			float sum = 0f;
36 | 			for(Text text : values){	
37 | 				list.add(text.toString());
38 | 				
39 | 				String[] val = text.toString().split("\t");
40 | 				float f = Float.parseFloat(val[1]);
41 | 				sum +=f;
42 | 			}
43 | 			
44 | 			for(String line : list){
45 | 				String[] vals = line.split("\t");
46 | 				Text k = new Text(vals[0]);
47 |                 
48 | 				float f = Float.parseFloat(vals[1]);
49 |                 Text v = new Text(prjob.scaleFloat ( (float)  (f / sum) ));
50 |                 context.write(k, v);
51 | 
52 | //                System.out.println(k + ":" + v);
53 | 			}
54 | 		}
55 | 	}
56 | 	
57 | 	public static void main(Map<String, String> path) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
58 | 		String input = path.get("input_pr");
59 |         String output = path.get("result");
60 | 
61 |         hdfsGYT hdfs = new hdfsGYT();
62 |         hdfs.rmr(output);
63 | 
64 |         Job job = new Job();
65 |         job.setJarByClass(prNormal.class);
66 | 		
67 | 		//set file input 
68 | 		FileInputFormat.addInputPath(job, new Path(input));
69 | 		job.setInputFormatClass(TextInputFormat.class);
70 | 		
71 | 		//set map
72 | 		job.setMapperClass(normalMapper.class);
73 | 		job.setMapOutputKeyClass(Text.class);
74 | 		job.setMapOutputValueClass(Text.class);
75 | 
76 | 		//set partition
77 | 		//set combine
78 | 		//set sort
79 | 		
80 | 		//set  reduce
81 | 		job.setReducerClass(normalReducer.class);
82 | 		job.setOutputKeyClass(Text.class);
83 | 		job.setOutputValueClass(Text.class);
84 | 		
85 | 		//set outputpath
86 | 		FileOutputFormat.setOutputPath(job, new Path(output));
87 | 		job.setOutputFormatClass(TextOutputFormat.class);
88 | 		
89 | 		//upload job
90 |         job.waitForCompletion(true);
91 | 	}
92 | }
93 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/prSort.java:
--------------------------------------------------------------------------------
 1 | package pagerankjisuan;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.URISyntaxException;
 5 | import java.util.Map;
 6 | 
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.FloatWritable;
 9 | import org.apache.hadoop.io.IntWritable;
10 | import org.apache.hadoop.io.IntWritable.Comparator;
11 | import org.apache.hadoop.io.Text;
12 | import org.apache.hadoop.io.WritableComparable;
13 | import org.apache.hadoop.mapreduce.Job;
14 | import org.apache.hadoop.mapreduce.Mapper;
15 | import org.apache.hadoop.mapreduce.Reducer;
16 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
17 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 |   
19 | public class prSort {  
20 | 	/**
21 |      * @param args
22 |      * @throws IOException 
23 |      * @throws IllegalArgumentException 
24 |      * @throws InterruptedException 
25 |      * @throws ClassNotFoundException 
26 |      */
27 |     public static class myComparator extends Comparator {
28 |         @SuppressWarnings("rawtypes")
29 |         public int compare( WritableComparable a,WritableComparable b){
30 |             return -super.compare(a, b);
31 |         }
32 |         public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
33 |             return -super.compare(b1, s1, l1, b2, s2, l2);
34 |         }
35 |     }
36 |     
37 |     public static class sortMap extends Mapper<Object,Text,FloatWritable,IntWritable>{
38 |         public void map(Object key,Text value,Context context) throws NumberFormatException, IOException, InterruptedException{
39 |             String[] split = value.toString().split("\t");
40 |             context.write(new FloatWritable(Float.parseFloat(split[1])),new IntWritable(Integer.parseInt(split[0])) );
41 |         }
42 |     }
43 |     public static class Reduce extends Reducer<FloatWritable,IntWritable,IntWritable,FloatWritable>{
44 |         public void reduce(FloatWritable key,Iterable<IntWritable>values,Context context) throws IOException, InterruptedException{
45 |             for (IntWritable text : values) {
46 |                 context.write( text,key);
47 |             }
48 |         }
49 |     }
50 |   
51 | 	public static void main(Map<String, String> path) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
52 |         // TODO Auto-generated method stub  
53 |   
54 | 		String input = path.get("result");
55 | 		String output = path.get("sort");
56 | 		hdfsGYT hdfs = new hdfsGYT();
57 | 		hdfs.rmr(output);
58 | 		
59 |         Job job = new Job();  
60 |         job.setJarByClass(prSort.class);  
61 |         // 1  
62 |         FileInputFormat.setInputPaths(job, new Path(input) );  
63 |         // 2  
64 |         job.setMapperClass(sortMap.class);  
65 |         job.setMapOutputKeyClass(FloatWritable.class);  
66 |         job.setMapOutputValueClass(IntWritable.class);  
67 |         // 3  
68 |         // 4  自定义排序
69 |         job.setSortComparatorClass( myComparator.class); 
70 |         // 5  
71 |         job.setNumReduceTasks(1);  
72 |         // 6  
73 |         job.setReducerClass(Reduce.class);  
74 |         job.setOutputKeyClass(IntWritable.class);  
75 |         job.setOutputValueClass(FloatWritable.class);  
76 |         // 7  
77 |         FileOutputFormat.setOutputPath(job, new Path(output));  
78 |         // 8  
79 |         System.exit(job.waitForCompletion(true)? 0 :1 );  
80 |     }  
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/Hadoop/pagerankjisuan/prjob.java:
--------------------------------------------------------------------------------
 1 | package pagerankjisuan;
 2 | 
 3 | import java.text.DecimalFormat;
 4 | import java.util.HashMap;
 5 | import java.util.Map;
 6 | 
 7 | /*
 8 |  * 调度函数
 9 |  */
10 | public class prjob {
11 | 
12 | 	public static final String  HDFS = "hdfs://127.0.0.1:9000";
13 | 	
14 | 	public static void main(String[] args) {
15 | 		Map  <String, String> path= new HashMap<String, String>();	
16 | 		
17 | 		path.put("page" ,"/home/thinkgamer/MyCode/hadoop/MyItems/pagerankjisuan/people.csv");
18 | 		path.put("pr" ,"/home/thinkgamer/MyCode/hadoop/MyItems/pagerankjisuan/peoplerank.txt");
19 | 		
20 | 		path.put("input", HDFS + "/mr/blog_analysic_system/people");          // HDFS的目录
21 |         path.put("input_pr", HDFS + "/mr/blog_analysic_system/pr");       // pr存储目录
22 |         path.put("tmp1", HDFS + "/mr/blog_analysic_system/tmp1");             // 临时目录,存放邻接矩阵
23 |         path.put("tmp2", HDFS + "/mr/blog_analysic_system/tmp2");           // 临时目录,计算到得PR,覆盖input_pr
24 | 
25 |         path.put("result", HDFS + "/mr/blog_analysic_system/result");                   // 计算结果的PR
26 |         
27 |         path.put("sort", HDFS +  "/mr/blog_analysic_system/sort");  //最终排序输出的结果
28 |         
29 |         try {
30 |         	   dataEtl.main();
31 |             prMatrix.main(path);
32 |             int iter = 3;           // 迭代次数
33 |             for (int i = 0; i < iter; i++) {
34 |                 prJisuan.main(path);
35 |             }
36 |            prNormal.main(path);
37 |            prSort.main(path);
38 | 
39 |         	} catch (Exception e) {
40 |         		e.printStackTrace();
41 |         	}
42 |         	System.exit(0);
43 |     	}
44 | 	
45 | 	 	public static String scaleFloat(float f) {// 保留6位小数
46 | 	        DecimalFormat df = new DecimalFormat("##0.000000");
47 | 	        return df.format(f);
48 | 	    }
49 | }


--------------------------------------------------------------------------------
/Hadoop/selfSort/input:
--------------------------------------------------------------------------------
1 | 2013	1
2 | 2013	5
3 | 2014	5
4 | 2014	8
5 | 2015	9
6 | 2015	4
7 | 


--------------------------------------------------------------------------------
/Hadoop/selfSort/output:
--------------------------------------------------------------------------------
1 | 2015	4
2 | 2015	9
3 | 2014	5
4 | 2014	8
5 | 2013	1
6 | 2013	5
7 | 


--------------------------------------------------------------------------------
/Hadoop/selfSort/selfSort.java:
--------------------------------------------------------------------------------
  1 | package selfSort;
  2 | 
  3 | /*
  4 |  * 第一列降序，第一列相同时第二列升序
  5 |  */
  6 | 
  7 | import java.io.DataInput;  
  8 | import java.io.DataOutput;  
  9 | import java.io.IOException;  
 10 |   
 11 | import org.apache.hadoop.fs.Path;  
 12 | import org.apache.hadoop.io.LongWritable;  
 13 | import org.apache.hadoop.io.Text;  
 14 | import org.apache.hadoop.io.WritableComparable;  
 15 | import org.apache.hadoop.mapreduce.Job;  
 16 | import org.apache.hadoop.mapreduce.Mapper;  
 17 | import org.apache.hadoop.mapreduce.Reducer;  
 18 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
 20 |   
 21 | public class selfSort {  
 22 |   
 23 |     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
 24 |         // TODO Auto-generated method stub  
 25 |   
 26 |         Job job = new Job();  
 27 |         job.setJarByClass(selfSort.class);  
 28 |         // 1  
 29 |         FileInputFormat.setInputPaths(job, new Path(args[0]));
 30 |         // 2  
 31 |         job.setMapperClass(Map.class);  
 32 |         job.setMapOutputKeyClass(MyK2.class);  
 33 |         job.setMapOutputValueClass(LongWritable.class);  
 34 |         // 3  
 35 |         // 4  
 36 |         // 5  
 37 |         job.setNumReduceTasks(1);  
 38 |         // 6  
 39 |         job.setReducerClass(Reduce.class);  
 40 |         job.setOutputKeyClass(LongWritable.class);  
 41 |         job.setOutputValueClass(LongWritable.class);  
 42 |         // 7  
 43 |         FileOutputFormat.setOutputPath(job, new Path(args[1]));  
 44 |         // 8  
 45 |         System.exit(job.waitForCompletion(true)? 0 :1 );  
 46 |     }  
 47 | public static class Map extends Mapper<Object, Text, MyK2, LongWritable>{  
 48 |     public void map(Object key, Text value, Context context) throws IOException, InterruptedException{  
 49 |         String line = value.toString();  
 50 |         String[] split = line.split("\t");  
 51 |         MyK2 my = new MyK2(Long.parseLong(split[0]), Long.parseLong(split[1]));  
 52 |         context.write(my, new LongWritable(1));  
 53 |     }  
 54 | }   
 55 | public static class Reduce extends Reducer<MyK2, LongWritable, LongWritable, LongWritable>{  
 56 |     public void reduce(MyK2 key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException{  
 57 |         context.write(new LongWritable(key.myk2), new LongWritable(key.myv2));  
 58 |     }  
 59 | }   
 60 |   
 61 | public static class MyK2 implements WritableComparable<MyK2>{  
 62 |   
 63 |     public long myk2;  
 64 |     public long myv2;  
 65 |       
 66 |     MyK2(){}  
 67 |       
 68 |     MyK2(long myk2, long myv2){  
 69 |         this.myk2 = myk2;  
 70 |         this.myv2 = myv2;  
 71 |     }  
 72 |       
 73 |     @Override  
 74 |     public void readFields(DataInput in) throws IOException {  
 75 |         // TODO Auto-generated method stub  
 76 |         this.myk2 = in.readLong();  
 77 |         this.myv2 = in.readLong();  
 78 |     }  
 79 |   
 80 |     @Override  
 81 |     public void write(DataOutput out) throws IOException {  
 82 |         // TODO Auto-generated method stub  
 83 |         out.writeLong(myk2);  
 84 |         out.writeLong(myv2);  
 85 |     }  
 86 |       
 87 |     @Override  
 88 |     public int compareTo(MyK2  myk2) {  
 89 |         // TODO Auto-generated method stub  
 90 |         //myk2之差>0 返回-1          <0 返回1 代表 myk2列降序  
 91 |         //myk2之差<0 返回-1           >0 返回1 代表 myk2列升序  
 92 |         long temp = this.myk2 - myk2.myk2;  
 93 |         if(temp>0)  
 94 |             return -1;  
 95 |         else if(temp<0)  
 96 |             return 1;  
 97 |         //控制myv2升序  
 98 |         return (int)(this.myv2 - myk2.myv2);  
 99 |     }  
100 | }  
101 | }  


--------------------------------------------------------------------------------
/Hadoop/sort_twice/Intpair.java:
--------------------------------------------------------------------------------
 1 | package sort_twice;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.WritableComparable;
 8 | 
 9 | public class Intpair implements WritableComparable<Intpair>{
10 | 	int first;
11 | 	int second;
12 | 	
13 | 	public void set(int first,int second){
14 | 		this.first = first;
15 | 		this.second = second;
16 | 	}
17 | 	
18 | 	public int getFirst() {
19 | 		// TODO Auto-generated method stub
20 | 		return first;
21 | 	}
22 | 	
23 | 	public int getSecond() {
24 | 		// TODO Auto-generated method stub
25 | 		return second;
26 | 	}
27 | 
28 | 	//序列化，从流中读进二进制转换成InPair
29 | 	@Override
30 | 	public void readFields(DataInput in) throws IOException {
31 | 		// TODO Auto-generated method stub
32 | 		first = in.readInt();
33 | 		second = in.readInt();
34 | 	}
35 | 
36 | 	//范序列化，将Intpair转换成二进制输出
37 | 	@Override
38 | 	public void write(DataOutput out) throws IOException {
39 | 		// TODO Auto-generated method stub
40 | 		out.writeInt(first);
41 | 		out.writeInt(second);
42 | 	}
43 | 
44 | 	//先按照first比较再按照second比较
45 | 	@Override
46 | 	public int compareTo(Intpair o) {
47 | 		// TODO Auto-generated method stub
48 | 		if(first != o.first){
49 | 			return first < o.first?-1:1;
50 | 		}else if(second !=o.second){
51 | 			return second < o.second?-1:1;
52 | 		}else{
53 | 			return 0;
54 | 		}
55 | 	}
56 | 	
57 | 	@Override
58 |     //The hashCode() method is used by the HashPartitioner (the default partitioner in MapReduce)
59 |     public int hashCode()
60 |     {
61 | 		return first+"".hashCode() + second+"".hashCode();
62 |     }
63 | 	
64 |     @Override
65 |     public boolean equals(Object right)
66 |     {
67 |     	 if (right instanceof Intpair) {
68 |     	      Intpair r = (Intpair) right;
69 |     	      return r.first == first && r.second == second;
70 |     	    } else {
71 |     	      return false;
72 |     	    }
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/Hadoop/sort_twice/groupingComparator.java:
--------------------------------------------------------------------------------
 1 | package sort_twice;
 2 | 
 3 | import org.apache.hadoop.io.RawComparator;
 4 | import org.apache.hadoop.io.WritableComparator;
 5 | 
 6 | public class groupingComparator implements RawComparator<Intpair> {
 7 | 	@Override
 8 | 	  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
 9 | 	    return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, b2, s2, Integer.SIZE/8);
10 | 	  }
11 | 	@Override
12 | 	public int compare(Intpair o1, Intpair o2) {
13 | 		// TODO Auto-generated method stub
14 | 		int first1 = o1.getFirst();
15 | 	    int first2 = o2.getFirst();
16 | 	    return first1 - first2;
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/Hadoop/sort_twice/input:
--------------------------------------------------------------------------------
 1 | 20 21 
 2 | 50 51 
 3 | 50 52 
 4 | 50 53 
 5 | 50 54 
 6 | 60 51 
 7 | 60 53 
 8 | 60 52 
 9 | 60 56 
10 | 60 57 
11 | 70 58 
12 | 60 61 
13 | 70 54 
14 | 70 55 
15 | 70 56 
16 | 70 57 
17 | 70 58 
18 | 1 2 
19 | 3 4 
20 | 5 6 
21 | 7 82 
22 | 203 21 
23 | 50 512 
24 | 50 522 
25 | 50 53 
26 | 530 54 
27 | 40 511 
28 | 20 53 
29 | 20 522 
30 | 60 56 
31 | 60 57 
32 | 740 58 
33 | 63 61 
34 | 730 54 
35 | 71 55 
36 | 71 56 
37 | 73 57 
38 | 74 58 
39 | 12 211 
40 | 31 42 
41 | 50 62 
42 | 7 8
43 | 


--------------------------------------------------------------------------------
/Hadoop/sort_twice/myPartition.java:
--------------------------------------------------------------------------------
 1 | package sort_twice;
 2 | 
 3 | import org.apache.hadoop.io.IntWritable;
 4 | import org.apache.hadoop.mapreduce.Partitioner;
 5 | 
 6 | public class myPartition extends Partitioner<Intpair,IntWritable> {
 7 | 
 8 | 	@Override
 9 | 	public int getPartition(Intpair key, IntWritable value, int numOfReducer) {
10 | 		// TODO Auto-generated method stub
11 | 		
12 | 		return Math.abs(key.getFirst() * 127) % numOfReducer ;
13 | 	}
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/Hadoop/sort_twice/output:
--------------------------------------------------------------------------------
 1 | ------------^^我们是同一个分组的^^-----------
 2 | 1	2
 3 | ------------^^我们是同一个分组的^^-----------
 4 | 3	4
 5 | ------------^^我们是同一个分组的^^-----------
 6 | 5	6
 7 | ------------^^我们是同一个分组的^^-----------
 8 | 7	8
 9 | 7	82
10 | ------------^^我们是同一个分组的^^-----------
11 | 12	211
12 | ------------^^我们是同一个分组的^^-----------
13 | 20	21
14 | 20	53
15 | 20	522
16 | ------------^^我们是同一个分组的^^-----------
17 | 31	42
18 | ------------^^我们是同一个分组的^^-----------
19 | 40	511
20 | ------------^^我们是同一个分组的^^-----------
21 | 50	51
22 | 50	52
23 | 50	53
24 | 50	53
25 | 50	54
26 | 50	62
27 | 50	512
28 | 50	522
29 | ------------^^我们是同一个分组的^^-----------
30 | 60	51
31 | 60	52
32 | 60	53
33 | 60	56
34 | 60	56
35 | 60	57
36 | 60	57
37 | 60	61
38 | ------------^^我们是同一个分组的^^-----------
39 | 63	61
40 | ------------^^我们是同一个分组的^^-----------
41 | 70	54
42 | 70	55
43 | 70	56
44 | 70	57
45 | 70	58
46 | 70	58
47 | ------------^^我们是同一个分组的^^-----------
48 | 71	55
49 | 71	56
50 | ------------^^我们是同一个分组的^^-----------
51 | 73	57
52 | ------------^^我们是同一个分组的^^-----------
53 | 74	58
54 | ------------^^我们是同一个分组的^^-----------
55 | 203	21
56 | ------------^^我们是同一个分组的^^-----------
57 | 530	54
58 | ------------^^我们是同一个分组的^^-----------
59 | 730	54
60 | ------------^^我们是同一个分组的^^-----------
61 | 740	58
62 | 


--------------------------------------------------------------------------------
/Hadoop/sort_twice/sort_twice.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Hadoop/sort_twice/sort_twice.jar


--------------------------------------------------------------------------------
/Hadoop/sort_twice/sort_twice.java:
--------------------------------------------------------------------------------
  1 | package sort_twice;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.Date;
  5 | import java.util.StringTokenizer;
  6 | 
  7 | import org.apache.hadoop.fs.Path;
  8 | import org.apache.hadoop.io.IntWritable;
  9 | import org.apache.hadoop.io.Text;
 10 | import org.apache.hadoop.mapreduce.Job;
 11 | import org.apache.hadoop.mapreduce.Mapper;
 12 | import org.apache.hadoop.mapreduce.Reducer;
 13 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 14 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 15 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 16 | 
 17 | public class sort_twice {
 18 | 
 19 | 	/**
 20 | 	 * @param args
 21 | 	 * @throws IOException 
 22 | 	 * @throws InterruptedException 
 23 | 	 * @throws ClassNotFoundException 
 24 | 	 */
 25 | 	public static class Map extends Mapper<Object,Text,Intpair,IntWritable>{
 26 | 		private final  Intpair intkey = new Intpair();
 27 | 		private final  IntWritable intvalue = new IntWritable();
 28 | 		
 29 | 		public void map(Object key, Text value,Context context) throws IOException, InterruptedException{
 30 | 			StringTokenizer token = new StringTokenizer(value.toString());
 31 | 			int left = 0;
 32 | 			int right = 0;
 33 | 			while (token.hasMoreElements()){
 34 | 				left = Integer.parseInt( token.nextToken());
 35 | 				if(token.hasMoreTokens())
 36 | 					right = Integer.parseInt(token.nextToken());
 37 | 				intkey.set(left,right);
 38 | 				intvalue.set(right);
 39 | 				context.write(intkey, intvalue);
 40 | 			}
 41 | 		}
 42 | 		
 43 | 	}
 44 | 
 45 | 	public static class Reduce extends Reducer<Intpair,IntWritable,Text,IntWritable>{
 46 | 		private final Text left = new Text();
 47 | 		private final Text SEPAPATOR= new Text("------------^^我们是同一个分组的^^-----------");
 48 | 		public void reduce(Intpair key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
 49 | 			left.set(Integer.toString(key.getFirst()));
 50 | 			context.write(SEPAPATOR, null);
 51 | 			for(IntWritable val:values){
 52 | 				context.write(left, val);
 53 | 			}
 54 | 		}
 55 | 	}
 56 | 
 57 | 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
 58 | 		// TODO Auto-generated method stub
 59 | 		Job job = new Job();
 60 | 		job.setJarByClass(sort_twice.class);
 61 | 		
 62 | 		//1 指定输入文件路径
 63 | 		FileInputFormat.addInputPath(job, new Path(args[0]));
 64 | 		job.setInputFormatClass(TextInputFormat.class);
 65 | 		
 66 | 		//2 设置Map相关
 67 | 		job.setMapperClass(Map.class);
 68 | 		job.setMapOutputKeyClass(Intpair.class);
 69 | 		job.setMapOutputValueClass(IntWritable.class);
 70 | 		
 71 | 		//3 设置分区和reducer数目
 72 | 		job.setPartitionerClass(myPartition.class);
 73 | 		
 74 | 		//4 重写分组函数
 75 | 		job.setGroupingComparatorClass(groupingComparator.class);
 76 | 		
 77 | 		//5 归约处理
 78 | 		//6 指定reducer类
 79 | 		job.setReducerClass(Reduce.class);
 80 | 		job.setOutputKeyClass(Text.class);
 81 | 		job.setOutputValueClass(IntWritable.class);
 82 | 		
 83 | 		//7设置输出路径
 84 | 		FileOutputFormat.setOutputPath(job, new Path(args[1]));
 85 | 		
 86 | 		//8 提交任务
 87 | 		int result = job.waitForCompletion(true)? 0 : 1;    //任务开始
 88 | 		
 89 | 
 90 | 		//输出任务相关的信息
 91 | 		Date start = new Date();
 92 | 		Date end = new Date();
 93 | 		float time = 	(float)(end.getTime()-start.getTime());
 94 | 		
 95 | 		System.out.println("Job ID:"+job.getJobID());
 96 | 		System.out.println("Job Name:"+job.getJobName());
 97 | 		System.out.println("Job StartTime:"+start);
 98 | 		System.out.println("Job EndTime:" + end);
 99 | 		System.out.println("Job 经历的时间：" + time);
100 | 		System.out.println("Job 是否成功:"+job.isSuccessful());
101 | 		System.out.println(result);
102 | 	}	
103 | }


--------------------------------------------------------------------------------
/Hadoop/wordcount/wordcount.java:
--------------------------------------------------------------------------------
 1 | package wordcount;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.StringReader;
 5 | 
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.IntWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.Mapper;
11 | import org.apache.hadoop.mapreduce.Reducer;
12 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14 | import org.wltea.analyzer.core.IKSegmenter;
15 | import org.wltea.analyzer.core.Lexeme;
16 | 
17 | public class wordcount {
18 | 
19 | 	public static class Map extends Mapper<Object,Text,Text,IntWritable>{
20 | 		private static final Text word = new Text();
21 | 		public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
22 | 			String line = value.toString();
23 | 			StringReader sr=new StringReader(line);  
24 | 	        IKSegmenter ik=new IKSegmenter(sr, true);  
25 | 	        Lexeme lex=null;  
26 | 	        while((lex=ik.next())!=null){  
27 | 				word.set(lex.getLexemeText());
28 | 				System.out.println(lex.getLexemeText() + "\tddddddddddddddddd\t" + "1");
29 | 				context.write(new Text(word),new IntWritable(1));
30 | 	        }  
31 | 		}
32 | 	}
33 | 	
34 | 	public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
35 | 		private static final IntWritable result = new IntWritable();
36 | 		public void reduce(Text key,Iterable<IntWritable>values,Context context) throws IOException, InterruptedException{
37 | 			int num =0;
38 | 			for(IntWritable value:values){
39 | 				num += value.get();
40 | 			}
41 | 			result.set(num);
42 | 			System.out.println(key.toString() + "\t................." + num);
43 | 			context.write(key, result);
44 | 		}
45 | 	}
46 | 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
47 | 		// TODO Auto-generated method stub
48 |        Job job = new Job();
49 |        job.setJarByClass(wordcount.class);
50 |        
51 |        job.setNumReduceTasks(1);   //设置reduce进程为1个，即output生成一个文件
52 |        
53 |        job.setMapperClass(Map.class);      
54 |        job.setReducerClass(Reduce.class);
55 |        
56 |        job.setOutputKeyClass(Text.class);    //为job的输出数据设置key类
57 |        job.setOutputValueClass(IntWritable.class);   //为job的输出设置value类
58 |        
59 |        FileInputFormat.addInputPath(job, new Path(args[0]));   //设置输入文件的目录
60 |        FileOutputFormat.setOutputPath(job,new Path(args[1])); //设置输出文件的目录
61 |        
62 |        System.exit(job.waitForCompletion(true)?0:1);   //提交任务
63 | 	}
64 | }


--------------------------------------------------------------------------------
/Hadoop/二次排序/blogURL.txt:
--------------------------------------------------------------------------------
1 | http://blog.csdn.net/gamer_gyt/article/details/47315405


--------------------------------------------------------------------------------
/Hadoop/二次排序/part-r-00000:
--------------------------------------------------------------------------------
 1 | ================================
 2 | 1	2
 3 | ================================
 4 | 3	4
 5 | ================================
 6 | 5	6
 7 | ================================
 8 | 7	8
 9 | 7	82
10 | ================================
11 | 12	211
12 | ================================
13 | 20	21
14 | 20	53
15 | 20	522
16 | ================================
17 | 31	42
18 | ================================
19 | 40	511
20 | ================================
21 | 50	51
22 | 50	52
23 | 50	53
24 | 50	53
25 | 50	54
26 | 50	62
27 | 50	512
28 | 50	522
29 | ================================
30 | 60	51
31 | 60	52
32 | 60	53
33 | 60	56
34 | 60	56
35 | 60	57
36 | 60	57
37 | 60	61
38 | ================================
39 | 63	61
40 | ================================
41 | 70	54
42 | 70	55
43 | 70	56
44 | 70	57
45 | 70	58
46 | 70	58
47 | ================================
48 | 71	55
49 | 71	56
50 | ================================
51 | 73	57
52 | ================================
53 | 74	58
54 | ================================
55 | 203	21
56 | ================================
57 | 530	54
58 | ================================
59 | 730	54
60 | ================================
61 | 740	58
62 | 


--------------------------------------------------------------------------------
/Hadoop/二次排序/sortTwice.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Hadoop/二次排序/sortTwice.jar


--------------------------------------------------------------------------------
/Hadoop/二次排序/sortTwice.txt:
--------------------------------------------------------------------------------
 1 | 20 21 
 2 | 50 51 
 3 | 50 52 
 4 | 50 53 
 5 | 50 54 
 6 | 60 51 
 7 | 60 53 
 8 | 60 52 
 9 | 60 56 
10 | 60 57 
11 | 70 58 
12 | 60 61 
13 | 70 54 
14 | 70 55 
15 | 70 56 
16 | 70 57 
17 | 70 58 
18 | 1 2 
19 | 3 4 
20 | 5 6 
21 | 7 82 
22 | 203 21 
23 | 50 512 
24 | 50 522 
25 | 50 53 
26 | 530 54 
27 | 40 511 
28 | 20 53 
29 | 20 522 
30 | 60 56 
31 | 60 57 
32 | 740 58 
33 | 63 61 
34 | 730 54 
35 | 71 55 
36 | 71 56 
37 | 73 57 
38 | 74 58 
39 | 12 211 
40 | 31 42 
41 | 50 62 
42 | 7 8 
43 | 


--------------------------------------------------------------------------------
/Hadoop/二次排序/sorttwice/IntPair.java:
--------------------------------------------------------------------------------
 1 | package sortTwice;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.WritableComparable;
 8 | 
 9 | //自己定义的InPair类，实现WritableComparator
10 | public class IntPair implements WritableComparable<IntPair>{
11 | 	int left;
12 | 	int right;
13 | 	
14 | 	public void set(int left, int right) {
15 | 		// TODO Auto-generated method stub
16 | 		this.left = left;
17 | 		this.right = right;
18 | 	}
19 | 	public int getLeft() {
20 | 		return left;
21 | 	}
22 | 
23 | 	public int getRight() {
24 | 		return right;
25 | 	}
26 | 	
27 | 	//反序列化，从流中读进二进制转换成IntPair
28 | 	@Override
29 | 	public void readFields(DataInput in) throws IOException {
30 | 		// TODO Auto-generated method stub
31 | 		this.left = in.readInt();
32 | 		this.right = in.readInt();
33 | 	}
34 | 	//序列化，将IntPair转换成二进制输出
35 | 	@Override
36 | 	public void write(DataOutput out) throws IOException {
37 | 		// TODO Auto-generated method stub
38 | 		out.writeInt(left);
39 | 		out.writeInt(right);
40 | 	}
41 | 	
42 | 	/*
43 | 	 * 为什么要重写equal方法？
44 | 	 * 因为Object的equal方法默认是两个对象的引用的比较，意思就是指向同一内存,地址则相等，否则不相等；
45 | 	 * 如果你现在需要利用对象里面的值来判断是否相等，则重载equal方法。
46 | 	 */
47 | 	@Override
48 | 	public boolean equals(Object obj) {
49 | 		// TODO Auto-generated method stub
50 | 		if(obj == null)
51 | 			return false;
52 | 		if(this == obj)
53 | 			return true;
54 | 		if (obj instanceof IntPair){
55 | 			IntPair r = (IntPair) obj;
56 | 			return r.left == left && r.right==right;
57 | 		}
58 | 		else{
59 | 			return false;
60 | 		}
61 | 			
62 | 	}
63 | 	
64 | 	/*
65 | 	 * 重写equal 的同时为什么必须重写hashcode？ 
66 | 	 * hashCode是编译器为不同对象产生的不同整数，根据equal方法的定义：如果两个对象是相等（equal）的，那么两个对象调用 hashCode必须产生相同的整数结果，
67 | 	 * 即：equal为true，hashCode必须为true，equal为false，hashCode也必须 为false，所以必须重写hashCode来保证与equal同步。 
68 | 	 */
69 | 	@Override
70 | 	public int hashCode() {
71 | 		// TODO Auto-generated method stub
72 | 		return left*157 +right;
73 | 	}
74 | 	
75 | 	//实现key的比较
76 | 	@Override
77 | 	public int compareTo(IntPair o) {
78 | 		// TODO Auto-generated method stub
79 | 		if(left != o.left)
80 | 			return left<o.left? -1:1;
81 | 		else if (right != o.right)
82 | 			return right<o.right? -1:1;
83 | 		else
84 | 			return 0;
85 | 	}
86 | 	
87 | 	
88 | }
89 | 


--------------------------------------------------------------------------------
/Hadoop/二次排序/sorttwice/sortTwice.java:
--------------------------------------------------------------------------------
  1 | package sortTwice;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.StringTokenizer;
  5 | 
  6 | import org.apache.hadoop.conf.Configuration;
  7 | import org.apache.hadoop.fs.Path;
  8 | import org.apache.hadoop.io.IntWritable;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.RawComparator;
 11 | import org.apache.hadoop.io.Text;
 12 | import org.apache.hadoop.io.WritableComparator;
 13 | import org.apache.hadoop.mapreduce.Job;
 14 | import org.apache.hadoop.mapreduce.Mapper;
 15 | import org.apache.hadoop.mapreduce.Partitioner;
 16 | import org.apache.hadoop.mapreduce.Reducer;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 19 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 21 | 
 22 | public class sortTwice {
 23 | 
 24 | 	static String HDFS = "hdfs://127.0.0.1:9000";    //hdfs 路径
 25 | 	
 26 | 	//Map类
 27 | 	public static class ST_Map extends Mapper<LongWritable, Text, IntPair ,IntWritable>{
 28 | 		private final IntPair intkey = new IntPair();
 29 | 		private final IntWritable intvalue = new IntWritable();		
 30 | 		public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
 31 | 			StringTokenizer itr = new StringTokenizer(value.toString());
 32 | 			int left =0;
 33 | 			int right =0;
 34 | 			while(itr.hasMoreTokens()){
 35 | 				left = Integer.parseInt(itr.nextToken());
 36 | 				if(itr.hasMoreTokens())
 37 | 					right = Integer.parseInt(itr.nextToken());
 38 | 				intkey.set(left, right);
 39 | 				intvalue.set(right);
 40 | 				context.write(intkey, intvalue);
 41 | 				
 42 | 			}
 43 | 		}
 44 | 	}
 45 | 	
 46 | 	//Reducce类
 47 | 	public static class ST_Reduce extends Reducer<IntPair, IntWritable, Text, IntWritable>{
 48 | 		private final Text left = new Text();
 49 | 		private static final Text SEPAPATOR= new Text("================================");
 50 | 		public void reduce(IntPair key,Iterable<IntWritable>values,Context context) throws IOException, InterruptedException{
 51 | 			context.write(SEPAPATOR, null);
 52 | 			left.set(Integer.toString(key.getLeft()));
 53 | 			for(IntWritable val:values){
 54 | 				context.write(left, val);
 55 | 			}
 56 | 		}
 57 | 	}
 58 | 	
 59 | 	//分区函数类，根据first确定Partition
 60 | 	public static class MyPartitioner extends Partitioner<IntPair, IntWritable>{
 61 | 		@Override
 62 | 		public int getPartition(IntPair key, IntWritable value, int numOfReduce) {
 63 | 			// TODO Auto-generated method stub
 64 | 			return Math.abs(key.getLeft()*127) % numOfReduce;
 65 | 		}
 66 | 	}
 67 | 	/**
 68 | 	 * 在分组比较的时候，只比较原来的key，而不是组合key。
 69 | 	 */
 70 | 	public static class MyGroupParator implements RawComparator<IntPair>{
 71 | 
 72 | 		@Override
 73 | 		public int compare(IntPair o1 , IntPair o2) {
 74 | 			// TODO Auto-generated method stub
 75 | 			int l = o1.getLeft();
 76 | 			int r = o2.getRight();
 77 | 			return l == r ? 0:(l<r ?-1:1);
 78 | 		}
 79 | 		//一个字节一个字节的比，直到找到一个不相同的字节，然后比这个字节的大小作为两个字节流的大小比较结果。
 80 | 		@Override
 81 | 		public int compare(byte[] b1, int l1, int r1, byte[] b2,int l2, int r2) {
 82 | 			// TODO Auto-generated method stub
 83 |             return WritableComparator.compareBytes(b1, l1, Integer.SIZE/8, b2, l2, Integer.SIZE/8);
 84 | 		}
 85 | 		
 86 | 	}
 87 | 	
 88 | 	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
 89 | 		
 90 | 		//读取hadoop配置
 91 | 		Configuration conf =new Configuration(); 
 92 | 		//初始化作业
 93 | 		Job job = new Job(conf, "sortTwice");
 94 | 		job.setJarByClass(sortTwice.class);
 95 | 		
 96 | 		//hdfs的输入输出路径
 97 | 		String input = HDFS + "/mr/mytest/sorttwice/input";
 98 | 		String output = HDFS + "/mr/mytest/sorttwice/output";
 99 | 		//1.1指定输入文件的位置
100 | 		FileInputFormat.setInputPaths(job, new Path(input));
101 | 		//对输入文件进行格式化，设置InputFormat，将输入的数据切分成小的数据块
102 | 		job.setInputFormatClass(TextInputFormat.class);
103 | 		
104 | 		//1.2指定map类
105 | 		job.setMapperClass(ST_Map.class);
106 | 		//指定map的输出格式
107 | 		job.setMapOutputKeyClass(IntPair.class);
108 | 		job.setMapOutputValueClass(IntWritable.class);
109 | 		
110 | 		//1.3指定分区函数
111 | //		job.setPartitionerClass(MyPartitioner.class);
112 | 		//指定reducenum个数
113 | 		job.setNumReduceTasks(1);
114 | 		
115 | 		//1.4 TODO 分组，排序
116 | 		job.setGroupingComparatorClass(MyGroupParator.class);
117 | 		
118 | 		//1.5 TODO 归约处理
119 | 		
120 | 		//2.1
121 | 		
122 | 		//2.2 指定reduce类
123 | 		job.setReducerClass(ST_Reduce.class);
124 | 		//设置reduce输入类型
125 | 		job.setOutputKeyClass(Text.class);
126 | 		job.setOutputValueClass(IntWritable.class);
127 | 		
128 | 		//2.3 指定输出文件路径
129 | 		FileOutputFormat.setOutputPath(job, new Path(output));
130 | 		//提供一个RecordReader实现数据的输出
131 | 		job.setOutputFormatClass(TextOutputFormat.class);
132 | 		
133 | 		//提交作业
134 | 		System.exit(job.waitForCompletion(true)? 0:1);
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/Hbase/README.md:
--------------------------------------------------------------------------------
1 | 本目录下主要是我对Hbase操作代码托管地方，代码质量不一定高，但是尽我所能去写好每次的code，欢迎补充
2 | 


--------------------------------------------------------------------------------
/Hive/README.md:
--------------------------------------------------------------------------------
1 | 本目录下主要是我Hive对操作代码托管地方，代码质量不一定高，但是尽我所能去写好每次的code，欢迎补充
2 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/complex_student:
--------------------------------------------------------------------------------
1 | thinkgamer,23,thinkgamer@163.com	chinese:50,math:49,english:50	cyan,qiao,gao	
2 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/complex_student~:
--------------------------------------------------------------------------------
1 | thinkgamer,23,thinkgamer@163.com	chinese,50,math,49,english,50	cyan,qiao,gao	
2 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/external_student:
--------------------------------------------------------------------------------
1 | 1	WEEW	23
2 | 2	QVCD	32
3 | 3	sdfw	43
4 | 4	rfwe	12
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/hiveQL:
--------------------------------------------------------------------------------
  1 | 1：创建内部表：
  2 | create table student(
  3 | id int,
  4 | name string,
  5 | age int)
  6 | comment 'this is student message table'
  7 | row format delimited fields terminated by '\t';
  8 | 
  9 | #从本地加载数据
 10 | load data local inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student;
 11 | #从HDFS加载数据
 12 | load data inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student;
 13 | 
 14 | 2：创建外部表
 15 | create external table external_student(
 16 | id int,
 17 | name string,
 18 | age int)
 19 | comment 'this is student message table'
 20 | row format delimited fields terminated by '\t'
 21 | location "/user/hive/external";
 22 | 
 23 | #加载数据
 24 | 直接将源文件放在外部表的目下即可
 25 | hdfs dfs -put /home/thinkgamer/MyCode/hive/external_student /user/hive/external
 26 | 这种加载方式常常用于当hdfs上有一些历史数据，而我们需要在这些数据上做一些hive的操作时使用。这种方式避免了数据拷贝开销
 27 | 
 28 | 
 29 | 3：创建copy_student表，并从student表中导入数据
 30 | create table copy_student(
 31 | id int,
 32 | name string,
 33 | age int)
 34 | comment 'this is student message table'
 35 | row format delimited fields terminated by '\t';
 36 | 
 37 | 导入数据
 38 | from student stu insert overwrite table copy_student select *;
 39 | 
 40 | 4：创建复杂类型的表
 41 | Create table complex_student(stu_mess ARRAY<STRING>,
 42 | stu_score MAP<STRING,INT>,
 43 | stu_friend STRUCT<a:STRING,b :STRING,c:STRING>)
 44 | comment 'this is complex_student message table'
 45 | row format delimited fields terminated by '\t'
 46 | COLLECTION ITEMS TERMINATED BY ','
 47 | MAP KEYS TERMINATED BY ':';
 48 | #修改表名字
 49 | alter table complex rename to complex_student;
 50 | #加载数据
 51 | load data local inpath "/home/thinkgamer/MyCode/hive/complex_student" into table complex_student;
 52 | 
 53 | #截断表 :从表或者表分区删除所有行，不指定分区，将截断表中的所有分区，也可以一次指定多个分区，截断多个分区。
 54 | truncate table complex_student;
 55 | 
 56 | #查询示例
 57 | select stu_mess[0],stu_score["chinese"],stu_friend.a from complex_student;
 58 | 结果：thinkgamer	50	cyan
 59 | 
 60 | 
 61 | 5：创建分区表partition_student
 62 | create table partition_student(
 63 | id int,
 64 | name string,
 65 | age int)
 66 | comment 'this is student message table'
 67 | Partitioned by (grade string,class string)
 68 | row format delimited fields terminated by "\t";
 69 | #加载数据
 70 | load data local inpath "/home/thinkgamer/MyCode/hive/partiton_student" into table partition_student partition (grade="2013", class="34010301");
 71 | load data local inpath "/home/thinkgamer/MyCode/hive/partiton_student2" into table partition_student partition (grade="2013", class="34010302");
 72 | 
 73 | 6:桶
 74 | 创建临时表
 75 | create table student_tmp(
 76 | id int,
 77 | name string,
 78 | age int)
 79 | comment 'this is student message table'
 80 | row format delimited fields terminated by '\t';
 81 | 
 82 | 加载数据：
 83 | load data local inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student_tmp;
 84 | 
 85 | 创建指定桶的个数的表student_bucket
 86 | create table student_bucket(id int,
 87 | name string,
 88 | age int)
 89 | clustered by(id) sorted by(age) into 2 buckets
 90 | row format delimited fields terminated by '\t';
 91 | 
 92 | 设置环境变量：
 93 | set hive.enforce.bucketing = true; 
 94 | 
 95 | 从student_tmp 装入数据
 96 | from student_tmp
 97 | insert overwrite table student_bucket
 98 | select *;
 99 | 
100 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/hiveQL~:
--------------------------------------------------------------------------------
 1 | 1：创建内部表：
 2 | create table student(
 3 | id int,
 4 | name string,
 5 | age int)
 6 | comment 'this is student message table'
 7 | row format delimited fields terminated by '\t';
 8 | 
 9 | #从本地加载数据
10 | load data local inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student;
11 | #从HDFS加载数据
12 | load data inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student;
13 | 
14 | 2：创建外部表
15 | create external table external_student(
16 | id int,
17 | name string,
18 | age int)
19 | comment 'this is student message table'
20 | row format delimited fields terminated by '\t'
21 | location "/user/hive/external";
22 | 
23 | #加载数据
24 | 直接将源文件放在外部表的目下即可
25 | hdfs dfs -put /home/thinkgamer/MyCode/hive/external_student /user/hive/external
26 | 这种加载方式常常用于当hdfs上有一些历史数据，而我们需要在这些数据上做一些hive的操作时使用。这种方式避免了数据拷贝开销
27 | 
28 | 
29 | 3：创建copy_student表，并从student表中导入数据
30 | create table copy_student(
31 | id int,
32 | name string,
33 | age int)
34 | comment 'this is student message table'
35 | row format delimited fields terminated by '\t';
36 | 
37 | 导入数据
38 | from student stu insert overwrite table copy_student select *;
39 | 
40 | 4：创建复杂类型的表
41 | Create table complex_student(stu_mess ARRAY<STRING>,
42 | stu_score MAP<STRING,INT>,
43 | stu_friend STRUCT<a:STRING,b :STRING,c:STRING>)
44 | comment 'this is complex_student message table'
45 | row format delimited fields terminated by '\t'
46 | COLLECTION ITEMS TERMINATED BY ','
47 | MAP KEYS TERMINATED BY ':';
48 | #修改表名字
49 | alter table complex rename to complex_student;
50 | #加载数据
51 | load data local inpath "/home/thinkgamer/MyCode/hive/complex_student" into table complex_student;
52 | 
53 | #截断表 :从表或者表分区删除所有行，不指定分区，将截断表中的所有分区，也可以一次指定多个分区，截断多个分区。
54 | truncate table complex_student;
55 | 
56 | #查询示例
57 | select stu_mess[0],stu_score["chinese"],stu_friend.a from complex_student;
58 | 结果：thinkgamer	50	cyan
59 | 
60 | 
61 | 5：创建分区表partition_student
62 | create table partition_student(
63 | id int,
64 | name string,
65 | age int)
66 | comment 'this is student message table'
67 | Partitioned by (grade string,class string)
68 | row format delimited fields terminated by "\t";
69 | #加载数据
70 | load data local inpath "/home/thinkgamer/MyCode/hive/partiton_student" into table partition_student partition (grade="2013", class="34010301");
71 | load data local inpath "/home/thinkgamer/MyCode/hive/partiton_student2" into table partition_student partition (grade="2013", class="34010302");
72 | 
73 | 6:桶
74 | 创建临时表
75 | create table student_tmp(
76 | id int,
77 | name string,
78 | age int)
79 | comment 'this is student message table'
80 | row format delimited fields terminated by '\t';
81 | 
82 | 加载数据：
83 | load data local inpath '/home/thinkgamer/MyCode/hive/student.txt' into table student_tmp;
84 | 
85 | 创建指定桶的个数的表student_bucket
86 | create table student_bucket(id int,
87 | name string,
88 | age int)
89 | clustered by(id) sorted by(age) into 2 buckets
90 | row format delimited fields terminated by '\t';
91 | 
92 | 设置环境变量：
93 | set hive.enforce.bucketing = true; 
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/partiton_student:
--------------------------------------------------------------------------------
1 | 1	WEEW	23
2 | 2	QVCD	32
3 | 3	sdfw	43
4 | 4	rfwe	12
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/partiton_student 2:
--------------------------------------------------------------------------------
1 | 5	hack	43
2 | 6	spring	54
3 | 7	cyan	23
4 | 8	thinkgamer	43
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/partiton_student 2~:
--------------------------------------------------------------------------------
1 | 5	hack	43
2 | 6	spring	54
3 | 7	cyan	23
4 | 8	thinkgamer	43
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/partiton_student2:
--------------------------------------------------------------------------------
1 | 5	hack	43
2 | 6	spring	54
3 | 7	cyan	23
4 | 8	thinkgamer	43
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/partiton_student~:
--------------------------------------------------------------------------------
1 | 1	WEEW	23
2 | 2	QVCD	32
3 | 3	sdfw	43
4 | 4	rfwe	12
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/student.txt:
--------------------------------------------------------------------------------
1 | 1	WEEW	23
2 | 2	QVCD	32
3 | 3	sdfw	43
4 | 4	rfwe	12
5 | 


--------------------------------------------------------------------------------
/Hive/hiveTableExample/student.txt~:
--------------------------------------------------------------------------------
1 | 1	WEEW	23
2 | 2	QVCD	32
3 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Graph/BFS.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Java/Dataguru算法导论/Graph/BFS.java


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Graph/DFS.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Java/Dataguru算法导论/Graph/DFS.java


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Graph/Dijkstra.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Java/Dataguru算法导论/Graph/Dijkstra.java


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Graph/GraphTest.java:
--------------------------------------------------------------------------------
  1 | package Graph;
  2 | 
  3 | import java.util.Scanner;
  4 | 
  5 | /*
  6 |  * 定义图的结构
  7 |  */
  8 | class Graph {
  9 | 	static final int MaxNum=20;        //最大节点数目
 10 | 	static final int MaxValue=65535;
 11 | 	char[] Vertex = new char[MaxNum];         //定义数组，保存顶点信息
 12 | 	
 13 | 	int GType;   //图的类型0：无向图  1：有向图
 14 | 	int VertxNum;              //顶点的数量
 15 | 	int EdgeNum;         //边的数量
 16 | 	
 17 | 	int[][] EdgeWeight = new int[MaxNum][MaxNum];     //定义矩阵保存顶点信息
 18 | 	int[] isTrav = new int[MaxNum];            //遍历标志
 19 | 
 20 | }
 21 | 
 22 | public class GraphTest {
 23 | 
 24 | 	/**
 25 | 	 * @param args
 26 | 	 * Author：thinkgamer
 27 | 	 */
 28 | 	static Scanner scan = new Scanner(System.in);
 29 | 	
 30 | 	//创建邻接矩阵图
 31 | 	static void createGraph(Graph g){
 32 | 		int i ,  j  ,  k;
 33 | 		int weight;     //权
 34 | 		char EstartV,  EndV;      //边的起始顶点
 35 | 		
 36 | 		System.out.println("输入途中各顶点的信息");
 37 | 		for(i=0; i < g.VertxNum; i ++)
 38 | 		{
 39 | 			System.out.println("第" + (i+1) + "个顶点");
 40 | 			g.Vertex[i] = (scan.next().toCharArray() )[0];
 41 | 		}
 42 | 		System.out.println("输入构成个遍的顶点和权值");
 43 | 		for(k=0;k<g.EdgeNum;k++)
 44 | 		{
 45 | 			System.out.println("第" + (k+1) + "条边：");
 46 | 			EstartV = scan.next().charAt(0);
 47 | 			EndV = scan.next().charAt(0);
 48 | 			weight = scan.nextInt();
 49 | 			for(i=0; EstartV!=g.Vertex[i] ; i++);           //在已有顶点中查找开始节点
 50 | 			for(j=0; EndV != g.Vertex[j]; j++);             //在已有节点上查找终结点
 51 | 			g.EdgeWeight[i][j] = weight;       //对应位置保存权重，表示有一条边
 52 | 			if(g.GType == 0)               //如果是无向图，在对角位置保存权重
 53 | 				g.EdgeWeight[j][i] = weight;
 54 | 		}
 55 | 	}
 56 | 	
 57 | 	//清空图
 58 | 	static void clearGraph(Graph g){
 59 | 		int i,j;
 60 | 		for(i=0; i< g.VertxNum; i++)
 61 | 			for(j =0; j<g.VertxNum; j++)
 62 | 				g.EdgeWeight[i][j] = Graph.MaxValue;           //设置矩阵中各院素的值为MaxValue
 63 | 	}
 64 | 	
 65 | 	//输出邻接矩阵
 66 | 	static void OutGraph(Graph g){
 67 | 		int i,j;
 68 | 		for(j = 0; j < g.VertxNum;j ++)
 69 | 			System.out.print("\t" + g.Vertex[j]);      //在第一行输入顶点信息
 70 | 		System.out.println();
 71 | 		
 72 | 		for(i =0 ;i <g.VertxNum; i ++)
 73 | 		{
 74 | 			System.out.print( g.Vertex[i]);
 75 | 			for(j = 0;j < g.VertxNum; j++)
 76 | 			{
 77 | 				if(g.EdgeWeight[i][j] == Graph.MaxValue)    //若权值为最大值
 78 | 					System.out.print("\tZ");    //Z 表示无穷大
 79 | 				else
 80 | 					System.out.print("\t" + g.EdgeWeight[i][j]);  //输出边的权重
 81 | 			}
 82 | 			System.out.println();
 83 | 		}
 84 | 	}
 85 | 	
 86 | 	//遍历图
 87 | 	static void DeepTraOne(Graph g,int n){//从第n个节点开始遍历
 88 | 		int i;
 89 | 		g.isTrav[n] = 1;              //标记为1表示该顶点已经被处理过
 90 | 		System.out.println("—>" + g.Vertex[n]); //输出节点数据
 91 | 		//添加处理节点的操作
 92 | 		for(i = 0; i< g.VertxNum; i++)
 93 | 		{
 94 | 			//if(g.EdgeWeight[n][i] != g.MaxValue && g.isTrav[n] == 0)  纠错为 下边一行，感谢网友http://blog.csdn.net/ZyManTou 提示
 95 |             if(g.EdgeWeight[n][i] != g.MaxValue && g.isTrav[i] == 0)
 96 | 			{
 97 | 				DeepTraOne(g, i);     //递归进行遍历
 98 | 			}
 99 | 		}
100 | 	}
101 | 	
102 | 	//深度优先遍历
103 | 	static void  DeepTraGraph(Graph g){
104 | 		int i;
105 | 		for(i = 0; i< g.VertxNum; i++)
106 | 		{
107 | 			g.isTrav[i]= 0;
108 | 		}
109 | 		System.out.println("深度优先遍历：");
110 | 		for(i = 0; i< g.VertxNum ; i++)
111 | 		{
112 | 			if(g.isTrav[i] == 0)
113 | 				DeepTraOne(g,i);
114 | 		}
115 | 		System.out.println();
116 | 	}
117 | 	
118 | 	public static void main(String[] args) {
119 | 		// TODO Auto-generated method stub
120 | 		Graph g = new Graph();
121 | 		System.out.println("输出生成图的类型：");
122 | 		g.GType = scan.nextInt();  //图的种类
123 | 		
124 | 		System.out.println("输入图的顶点数量：");
125 | 		g.VertxNum = scan.nextInt();
126 | 		
127 | 		System.out.println("输入图的边数量：");
128 | 		g.EdgeNum = scan.nextInt();
129 | 		
130 | 		clearGraph(g);          //清空图
131 | 		createGraph(g);      //生成邻接表结构的图
132 | 		System.out.println("该图的邻接矩阵数据如下：");
133 | 		OutGraph(g);        //输出图
134 | 		DeepTraGraph(g);    //深度优先遍历图
135 | 	}
136 | 
137 | }
138 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Hash/hash.java:
--------------------------------------------------------------------------------
  1 | package Hash;
  2 | /*
  3 |  * Hash函数介绍
  4 |  * 除下面介绍的集中hash函数外还有取余散列法（m一般选择较大的素数，例如701） h(k) = k mod m
  5 |  * 乘法散列法（m选择2的计算机的位数（64位或者32位）,A为sqrt(5)-1 = 0.618）h(k) = m(kA mod 1 )
  6 |  */
  7 | 
  8 | public class hash {
  9 | 
 10 | 	//1：RS
 11 |     public static long RSHash(String str)  
 12 |     {  
 13 |        int b     = 378551;  
 14 |        int a     = 63689;  
 15 |        long hash = 0;  
 16 |        for(int i = 0; i < str.length(); i++)  
 17 |        {  
 18 |           hash = hash * a + str.charAt(i);  
 19 |           a    = a * b;  
 20 |        }  
 21 |        return hash;  
 22 |     }  
 23 |     
 24 |     //2：JS  Justin Sobel写的一个位操作的哈希函数。
 25 |     public static long JSHash(String str)  
 26 |        {  
 27 |           long hash = 1315423911;  
 28 |           for(int i = 0; i < str.length(); i++)  
 29 |           {  
 30 |              hash ^= ((hash << 5) + str.charAt(i) + (hash >> 2));  
 31 |           }  
 32 |           return hash;  
 33 |        }  
 34 | 	
 35 |     //3：PJW 	该散列算法是基于贝尔实验室的彼得J温伯格的的研究。在Compilers一书中（原则，技术和工具），建议采用这个算法的散列函数的哈希方法。 
 36 |     public static long PJWHash(String str)  
 37 |     {  
 38 |         long BitsInUnsignedInt = (long)(4 * 8);  
 39 |         long ThreeQuarters     = (long)((BitsInUnsignedInt  * 3) / 4);  
 40 |         long OneEighth         = (long)(BitsInUnsignedInt / 8);  
 41 |         long HighBits          = (long)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);  
 42 |         long hash              = 0;  
 43 |         long test              = 0;  
 44 |         for(int i = 0; i < str.length(); i++)  
 45 |         {  
 46 |            hash = (hash << OneEighth) + str.charAt(i);  
 47 |            if((test = hash & HighBits)  != 0)  
 48 |            {  
 49 |               hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));  
 50 |            }  
 51 |         }  
 52 |         return hash;  
 53 |      } 
 54 |     
 55 |     //4：ELF      和PJW很相似，在Unix系统中使用的较多。
 56 |     public static long ELFHash(String str)  
 57 |        {  
 58 |           long hash = 0;  
 59 |           long x    = 0;  
 60 |           for(int i = 0; i < str.length(); i++)  
 61 |           {  
 62 |              hash = (hash << 4) + str.charAt(i);  
 63 |              if((x = hash & 0xF0000000L) != 0)  
 64 |              {  
 65 |                 hash ^= (x >> 24);  
 66 |              }  
 67 |              hash &= ~x;  
 68 |           }  
 69 |           return hash;  
 70 |        }  
 71 |     
 72 |     //5：BKDR
 73 |     /*
 74 |      * 这个算法来自Brian Kernighan 和 Dennis Ritchie的 The C Programming Language。
 75 |      * 这是一个很简单的哈希算法,使用了一系列奇怪的数字,形式如31,3131,31...31,看上去和DJB算法很相似
 76 |      */
 77 |     public static long BKDRHash(String str)  
 78 |        {  
 79 |           long seed = 131; // 31 131 1313 13131 131313 etc..  
 80 |           long hash = 0;  
 81 |           for(int i = 0; i < str.length(); i++)  
 82 |           {  
 83 |              hash = (hash * seed) + str.charAt(i);  
 84 |           }  
 85 |           return hash;  
 86 |        }  
 87 |     
 88 |     //6：SDBM     这个算法在开源的SDBM中使用，似乎对很多不同类型的数据都能得到不错的分布。
 89 |     public static long SDBMHash(String str)  
 90 |     {  
 91 |        long hash = 0;  
 92 |        for(int i = 0; i < str.length(); i++)  
 93 |        {  
 94 |           hash = str.charAt(i) + (hash << 6) + (hash << 16) - hash;  
 95 |        }  
 96 |        return hash;  
 97 |     }
 98 |     
 99 |     //7：DJB 这个算法是Daniel J.Bernstein 教授发明的，是目前公布的最有效的哈希函数
100 |     public static long DJBHash(String str)  
101 |        {  
102 |           long hash = 5381;  
103 |           for(int i = 0; i < str.length(); i++)  
104 |           {  
105 |              hash = ((hash << 5) + hash) + str.charAt(i);  
106 |           }  
107 |           return hash;  
108 |        }  
109 |     
110 |     //8：DEK    由伟大的Knuth在《编程的艺术 第三卷》的第六章排序和搜索中给出。
111 |     public static long DEKHash(String str)  
112 |        {  
113 |           long hash = str.length();  
114 |           for(int i = 0; i < str.length(); i++)  
115 |           {  
116 |              hash = ((hash << 5) ^ (hash >> 27)) ^ str.charAt(i);  
117 |           }  
118 |           return hash;  
119 |        }  
120 |     
121 |     //9：AP    这是本文作者Arash Partow贡献的一个哈希函数，继承了上面以旋转以为和加操作。代数描述：AP
122 |     public static long APHash(String str)  
123 |        {  
124 |           long hash = 0xAAAAAAAA;  
125 |           for(int i = 0; i < str.length(); i++)  
126 |           {  
127 |              if ((i & 1) == 0)  
128 |              {  
129 |                 hash ^= ((hash << 7) ^ str.charAt(i) * (hash >> 3));  
130 |              }  
131 |              else  
132 |              {  
133 |                 hash ^= (~((hash << 11) + str.charAt(i) ^ (hash >> 5)));  
134 |              }  
135 |           }  
136 |           return hash;  
137 |        }  
138 |     
139 |    //主函数
140 | 	public static void main(String[] args) {
141 | 		String str = "thinkgamer";
142 | 		System.out.println("thinkgamer 的 RSHash：" + RSHash(str));
143 | 		System.out.println("thinkgamer 的  JSHash：" + JSHash(str));
144 | 		System.out.println("thinkgamer 的 PJWHash：" + PJWHash(str));
145 | 		System.out.println("thinkgamer 的 ELFHash：" + ELFHash(str));
146 | 		System.out.println("thinkgamer 的 BKDRHash：" + BKDRHash(str));
147 | 		System.out.println("thinkgamer 的 SDBMHash：" + SDBMHash(str));
148 | 		System.out.println("thinkgamer 的 DJBHash：" + DJBHash(str));
149 | 		System.out.println("thinkgamer 的 DEKHash：" + DEKHash(str));
150 | 		System.out.println("thinkgamer 的 APHash：" + APHash(str));
151 | 	}
152 | }
153 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Link/DoubleLink.java:
--------------------------------------------------------------------------------
  1 | package Link;
  2 | 
  3 | import java.util.Scanner;
  4 | 
  5 | class Data{           //定义链表的一个节点
  6 | 	String key;           //节点的关键字,唯一
  7 | 	String name;
  8 | 	int age;
  9 | }
 10 | 
 11 | public class DoubleLink {
 12 | 
 13 | 	
 14 | 	int flag; //输入选择值
 15 | 	Scanner scan = new Scanner(System.in);
 16 | 	Data data = new Data();
 17 | 	DoubleLink nextNode;  //后继节点
 18 | 	DoubleLink priorNode;    //前驱节点
 19 | 	
 20 | 	//链表添加节点
 21 | 	DoubleLink addNode(DoubleLink head, String priorKey, String nextKey, Data nodeData){
 22 | 		
 23 | 		DoubleLink node=null, htemp=null;
 24 | 		if((node = new DoubleLink()) == null)
 25 | 			System.out.println("内存空间分配失败");
 26 | 		if(head== null)        //如果head为空
 27 | 		{
 28 | 			System.out.println("当前链表为空，是否将当前节点当作头节点？\n0：否\t1：是");
 29 | 			
 30 | 			node.data=nodeData;
 31 | 			node.nextNode=null;
 32 | 			node.priorNode=null;
 33 | 			flag = scan.nextInt();
 34 | 			switch(flag)
 35 | 			{
 36 | 			case 0:
 37 | 				break;
 38 | 			case 1:
 39 | 				head=node;
 40 | 				break;
 41 | 			default:
 42 | 					System.out.println("你输入的数据不合法");;
 43 | 			}
 44 | 		}       //如果head不为空
 45 | 		else{
 46 | 			if(linkFindNode(head, priorKey,nextKey,nodeData))
 47 | 				System.out.println("插入成功");
 48 | 			else
 49 | 				System.out.println("插入失败(原因可能是你输入的前驱和后继即诶但均不存在)");
 50 | 		}
 51 | 					
 52 | 		return head;
 53 | 	}
 54 | 
 55 | 	//查找并插入节点
 56 | 	boolean linkFindNode(DoubleLink head, String priorKey, String nextKey,Data nodeData) {
 57 | 		// TODO Auto-generated method stub
 58 | 		DoubleLink htemp=null,node=null;
 59 | 		
 60 | 		if( (node = new DoubleLink()) == null )
 61 | 		{
 62 | 			System.out.println("内存分配失败");
 63 | 			return false;
 64 | 		}
 65 | 		//将传进来的值赋值给node
 66 | 		node.data = nodeData;
 67 | 		node.nextNode = null;
 68 | 		node.priorNode=null;
 69 | 		//两大类情况
 70 | 		htemp = head;
 71 | 		while(htemp != null)
 72 | 		{
 73 | 			if(htemp.data.key.equals(priorKey)) //前驱节点存在
 74 | 			{
 75 | 				if(htemp.nextNode == null)     //该节点的后继节点为空，说明该节点为头节点
 76 | 				{
 77 | 					System.out.println("你输入的后继节点不存在，前驱节点为头节点，是否插入在其后面？\n 1：是 \t 0 ：否 ");
 78 | 					flag = scan.nextInt();
 79 | 					if(flag == 0)
 80 | 						break;
 81 | 					else if(flag==1)
 82 | 					{
 83 | 						htemp.nextNode = node;       //将查找到的节点的后继节点指向node
 84 | 						node.nextNode = null;
 85 | 						node.priorNode = htemp;
 86 | 						
 87 | 						return true;
 88 | 					}
 89 | 					else
 90 | 						System.out.println("你输入的数字不合法！！！");
 91 | 				}
 92 | 				else             //后继节点不为空
 93 | 				{
 94 | 					if(htemp.nextNode.data.key.equals(nextKey))            //存在的后继节点与nextKey相同。相同执行if
 95 | 					{
 96 | 						node.nextNode = htemp.nextNode;
 97 | 						htemp.nextNode.priorNode = node;
 98 | 						
 99 | 						htemp.nextNode = node;
100 | 						node.priorNode = htemp;
101 | 						return true;
102 | 					
103 | 					}
104 | 					else         //不同执行else
105 | 					{
106 | 						htemp = htemp.nextNode; //若当前节点没找到，遍历下一个节点
107 | 					}
108 | 				}
109 | 			}
110 | 			else //前驱节点不存在，后驱节点存在
111 | 			{
112 | 				if(htemp.data.key.equals(nextKey))      //如果当前节点与nextKey相同
113 | 				{
114 | 					if(htemp.nextNode==null)  //如果后继节点为空，即当前节点为尾节点
115 | 					{
116 | 						System.out.println("你输入的前驱节点不存在，后继节点为头节点，是否插入在其前面？\n 1：是 \t 0 ：否 ");
117 | 						flag = scan.nextInt();
118 | 						if(flag == 0)
119 | 							break;
120 | 						else if(flag==1)
121 | 						{
122 | 							htemp.priorNode = node;
123 | 							node.nextNode = htemp;
124 | 							
125 | 							node.priorNode=null;
126 | 							return true;
127 | 						}
128 | 						else
129 | 							System.out.println("你输入的数字不合法！！！");
130 | 					}
131 | 					else //如果当前节点的后继节点不为空，则执行下一个节点
132 | 					{
133 | 						htemp = htemp.nextNode; //若当前节点没找到，遍历下一个节点
134 | 					}
135 | 				}
136 | 				else
137 | 					htemp = htemp.nextNode; //若当前节点没找到，遍历下一个节点
138 | 			}
139 | 		}
140 | 		return false;
141 | 	}
142 | 	
143 | 	//输出节点
144 | 	public void OutputLinkNode(DoubleLink head)
145 | 	{
146 | 		if(head == null)
147 | 			System.out.println("当前链表为空");
148 | 		else{
149 | 			System.out.println("输入的链表数据如下：");
150 | 			DoubleLink htemp;
151 | 			htemp = head;
152 | 			while(htemp!=null)
153 | 			{
154 | 				System.out.println(htemp.data.key + "\t" + htemp.data.name + "\t" + htemp.data.age);
155 | 				htemp= htemp.nextNode;
156 | 			}
157 | 		}
158 | 		System.out.println();
159 | 	}
160 | 	
161 | 	//输出链表的深度
162 | 	int LinkDepth(DoubleLink head)
163 | 	{
164 | 		int sum = 0;
165 | 		DoubleLink htemp = head;
166 | 		while(htemp!=null)
167 | 		{
168 | 			sum ++;
169 | 			htemp = htemp.nextNode;
170 | 		}
171 | 		return sum;
172 | 	}
173 | 	
174 | 	//查找节点
175 | 	DoubleLink FindLink(DoubleLink head, String findKey)
176 | 	{
177 | 		DoubleLink htemp=head;
178 | 		while(htemp!=null)
179 | 		{
180 | 			if(htemp.data.key.equals(findKey))
181 | 				return htemp;
182 | 			htemp = htemp.nextNode;
183 | 		}
184 | 		return null;
185 | 	}
186 | 	
187 | 	//删除节点
188 | 	DoubleLink DeleteNode(DoubleLink head, String deleteKey)
189 | 	{
190 | 		DoubleLink htemp = head;
191 | 		while(htemp!=null)
192 | 		{
193 | 			if(htemp.data.key.equals(deleteKey))
194 | 			{
195 | 				if(htemp.priorNode==null)  //如果是头节点
196 | 				{
197 | 					return htemp.nextNode;
198 | 				}
199 | 				else if (htemp.nextNode==null)     //如果是尾节点
200 | 				{
201 | 					htemp.priorNode.nextNode=null;
202 | 					htemp.priorNode=null;
203 | 					return head;
204 | 				}
205 | 				else //如果是中间
206 | 				{
207 | 					htemp.priorNode.nextNode=htemp.nextNode;
208 | 					htemp.nextNode.priorNode = htemp.priorNode;
209 | 					return head;
210 | 				}
211 | 			}
212 | 			else
213 | 				htemp = htemp.nextNode;
214 | 		}	
215 | 		System.out.println("你要删除的节点不存在！");
216 | 		return head;
217 | 	}
218 | 	
219 | }
220 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Link/DoubleLinkTest.java:
--------------------------------------------------------------------------------
 1 | package Link;
 2 | 
 3 | import java.util.Scanner;
 4 | 
 5 | public class DoubleLinkTest {
 6 | 
 7 | 	public static void main(String[] args) {
 8 | 		
 9 | 		DoubleLink node=null, head=null;
10 | 		DoubleLink dlink = new DoubleLink(); //声明一个双向链表对象
11 | 		Scanner scan  = new Scanner(System.in);
12 | 		
13 | 		System.out.println("双向链表测试开始....");
14 | 		do{
15 | 			System.out.println("请输入插入节点的关键字，姓名和年龄，格式为：关键字	姓名	年龄");
16 | 			Data data = new Data();
17 | 			data.key = scan.next();
18 | 			data.name = scan.next();
19 | 			data.age = scan.nextInt();
20 | 
21 | 			if(data.key.contains("0"))  //循环插入节点，直到插入的为0时结束
22 | 				break;
23 | 			else
24 | 			{
25 | 				System.out.println("请输入插入节点的前驱节点和后继节点，格式为 前驱节点  后继节点");
26 | 				String priorKey = scan.next();
27 | 				String nextKey = scan.next();
28 | 				
29 | 				head = dlink.addNode(head, priorKey, nextKey, data);   //添加节点 
30 | 				dlink.OutputLinkNode(head);   //输出链表
31 | 			}
32 | 		}while(true);
33 | 		
34 | 		//输出链表的深度
35 | 		System.out.println("该链表的深度为：" + dlink.LinkDepth(head));
36 | 		
37 | 		//查找链表中的某个节点
38 | 		System.out.println("请输入要查找的节点的关键字...");
39 | 		String findKey = scan.next();
40 | 		node = dlink.FindLink(head, findKey);
41 | 		 if(node==null)
42 | 			 System.out.println("你所查找的节点不存在！");
43 | 		 else
44 | 			 System.out.println("该节点的值为：" + node.data.key + "\t" + node.data.name + "\t" + node.data.age);
45 | 		
46 | 		 //删除节点值
47 | 		 System.out.println("请输入要删除的节点的关键字...");
48 | 		 String deleteKey = scan.next();
49 | 		 node =  dlink.DeleteNode(head, deleteKey);
50 | 		 if(node == null)
51 | 			 System.out.println("删除节点后的链表为空，其深度为：" + 0);
52 | 		 else
53 | 		 	{
54 | 				 System.out.println("删除后的链表为：");
55 | 				 dlink.OutputLinkNode(head);
56 | 				 System.out.println("删除节点后链表的深度为：" + dlink.LinkDepth(head));
57 | 		 	}
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Link/Link.java:
--------------------------------------------------------------------------------
  1 | package Link;
  2 | 
  3 | class DATA{           //定义链表的一个节点
  4 | 	String key;           //节点的关键字
  5 | 	String name;
  6 | 	int age;
  7 | }
  8 | 
  9 | public class Link {     //定义链表结构
 10 | 	
 11 | 	DATA nodeData = new DATA();     //声明一个节点
 12 | 	Link nextNode;                                //指向下一个节点的指针
 13 | 	
 14 | 	
 15 | 	//添加节点
 16 | 	Link linkAddEnd(Link head, DATA nodeData)
 17 | 	{
 18 | 		Link node, hTemp;
 19 | 		if( (node = new Link()) ==null)        //如果内存空间分配失败，则返回为空
 20 | 		{
 21 | 			System.out.println("内存空间分配失败！");
 22 | 			return null;
 23 | 		}
 24 | 		else
 25 | 		{
 26 | 			node.nodeData = nodeData;
 27 | 			node.nextNode = null;
 28 | 			if(head == null)      //如果头节点为空，则把当前节点赋给head，并返回
 29 | 			{
 30 | 				head = node;
 31 | 				return head;
 32 | 			}      
 33 | 			hTemp = head;       //如果头节点不为空
 34 | 			while(hTemp.nextNode!=null)        //查找链表的末尾
 35 | 			{
 36 | 				hTemp = hTemp.nextNode;
 37 | 			}
 38 | 			hTemp.nextNode = node;
 39 | 			return head;
 40 | 		}
 41 | 	}
 42 | 	
 43 | 	//插入头节点
 44 | 	Link linkAddFirst(Link head, DATA nodeData)
 45 | 	{
 46 | 		Link node;
 47 | 		if((node=new Link()) == null ) //如果内存空间分配失败，则返回为空
 48 | 		{
 49 | 			System.out.println("内存分配失败");
 50 | 			return null;
 51 | 		}
 52 | 		else
 53 | 		{
 54 | 			node.nodeData = nodeData;
 55 | 			node.nextNode = head;
 56 | 			head = node;
 57 | 			return head;
 58 | 		}
 59 | 	}
 60 | 	
 61 | 	//查找节点
 62 | 	Link linkFindNode(Link head, String key)
 63 | 	{
 64 | 		Link hTemp;
 65 | 		hTemp = head;
 66 | 		while(hTemp!=null)       //若节点有效，则进行查找
 67 | 		{
 68 | 			if(hTemp.nodeData.key.compareTo(key) == 0) //若节点的关键字与传入的关键字相同
 69 | 			{
 70 | 				return hTemp;
 71 | 			}
 72 | 			hTemp = hTemp.nextNode;     //处理下一个节点
 73 | 		}
 74 | 		return null;	
 75 | 	}
 76 | 	
 77 | 	//插入节点
 78 | 	Link linkInsertNode(Link head, String findKey,DATA nodeData)
 79 | 	{
 80 | 		Link node,hTemp;
 81 | 		if((node = new Link() ) == null ) //分配内存失败，则返回
 82 | 		{
 83 | 			System.out.println("分配内存失败...");
 84 | 			return null;
 85 | 		}
 86 | 		node.nodeData = nodeData;      //保存当前集节点信息
 87 | 		hTemp = linkFindNode(head, findKey);      //查找要插入的节点
 88 | 		if(hTemp != null)
 89 | 		{
 90 | 			node.nextNode = hTemp.nextNode;
 91 | 			hTemp.nextNode = node;
 92 | 		}
 93 | 		else
 94 | 		{
 95 | 			System.out.println("未找到正确的插入位置.........");
 96 | 		}
 97 | 		return head;          //返回头引用
 98 | 	}
 99 | 	
100 | 	//删除节点
101 | 	int linkDeleteNode(Link head, String key)
102 | 	{
103 | 		Link node,hTemp;
104 | 		hTemp = head;
105 | 		node = head;
106 | 		while(hTemp != null )
107 | 		{
108 | 			if(hTemp.nodeData.key.compareTo(key) == 0)   //若找到关键字，则删除
109 | 			{
110 | 				node.nextNode = hTemp.nextNode;
111 | 				hTemp = null;
112 | 				return 1;
113 | 			}
114 | 			else               //跳到下一个节点
115 | 			{
116 | 				node = hTemp;
117 | 				hTemp = hTemp.nextNode;
118 | 			}
119 | 		}
120 | 		return 0;
121 | 	}
122 | 	
123 | 	//计算链表长度
124 | 	int linkLength(Link head)
125 | 	{
126 | 		Link hTemp;
127 | 		hTemp = head;
128 | 		int num = 0;
129 | 		while(hTemp!=null)
130 | 		{
131 | 			num ++ ;
132 | 			hTemp = hTemp.nextNode;
133 | 		}
134 | 		return num;	
135 | 	}
136 | 	
137 | 	//显示所有节点
138 | 	void linkShow(Link head)
139 | 	{
140 | 		Link hTemp;
141 | 		DATA nodeData;
142 | 		hTemp = head;
143 | 		System.out.printf("当前链表共有 %d 个节点，链表所有的数据如下：\n" , linkLength(head));
144 | 		while(hTemp!=null)
145 | 		{
146 | 			nodeData = hTemp.nodeData;     //获取当前的节点数据
147 | 			System.out.printf("节点(%s %s  %d)\n",nodeData.key,nodeData.name,nodeData.age);
148 | 			hTemp = hTemp.nextNode;
149 | 		}
150 | 	}
151 | 	
152 | }


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Link/linkTest.java:
--------------------------------------------------------------------------------
 1 | package Link;
 2 | 
 3 | import java.util.Scanner;
 4 | 
 5 | public class linkTest {
 6 | 
 7 | 	public static void main(String[] args) {
 8 | 		Link node = null , head=null;
 9 | 		Link link = new Link();
10 | 		String key, findKey;
11 | 		Scanner input = new Scanner(System.in);
12 | 		
13 | 		System.out.printf("链表测试开始，先输出链表中的数据，格式为：关键字	姓名	年龄\n");
14 | 		do
15 | 		{                        //循环插入节点，知道输入的key 为0 结束
16 | 			DATA nodeData = new DATA();
17 | 			nodeData.key = input.next();
18 | 			if(nodeData.key.equals("0"))
19 | 			{
20 | 				break;
21 | 			}
22 | 			else
23 | 			{
24 | 				nodeData.name = input.next();
25 | 				nodeData.age = input.nextInt();
26 | 				head = link.linkAddEnd(head, nodeData);  //在链表尾部添加节点
27 | 			}
28 | 		}while(true);
29 | 		link.linkShow(head);     //显示所有节点
30 | 		
31 | 		System.out.printf("\n演示插入节点，输入插入位置的关键字：");
32 | 		findKey = input.next();                //输入插入的关键字
33 | 		System.out.println("输入插入节点的数据(关键字 姓名 年龄)");
34 | 		DATA nodeData = new DATA();              //输入节点的元素值
35 | 		nodeData.key = input.next();
36 | 		nodeData.name = input.next();
37 | 		nodeData.age = input.nextInt();
38 | 		head = link.linkInsertNode(head, findKey, nodeData);           //调用插入函数
39 | 		link.linkShow(head);    //显示所有节点
40 | 		
41 | 		System.out.println("演示删除节点，输入要删除的关键字：");
42 | 		key = input.next();
43 | 		link.linkDeleteNode(head, key);         //调用删除节点的函数
44 | 		link.linkShow(head);                     //显示所有节点
45 | 		
46 | 		System.out.println("演示在链表中差找，输入要查找的关键字：");
47 | 		key = input.next();
48 | 		node = link.linkFindNode(head, key);  //调用查找函数,返回节点引用
49 | 		if(node!=null)
50 | 		{
51 | 			nodeData = node.nodeData;         //获取节点的数据
52 | 			System.out.printf("关键字 %s 对应的节点数据为 （%s %s %s）\n", key,nodeData.key,nodeData.name,nodeData.age);
53 | 		}
54 | 		else
55 | 		{
56 | 			System.out.printf("在链表中为查找的为%s 的关键字 \n" , key);
57 | 		}
58 | 		
59 | 		
60 | 	}
61 | 	
62 | }
63 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Matrix/matrixCheng.java:
--------------------------------------------------------------------------------
 1 | package Matrix;
 2 | 
 3 | /*
 4 |  * 方阵相乘
 5 |  * strassen，矩阵分块思想
 6 |  */
 7 | public class matrixCheng {
 8 | 	//用于计算的两个数组
 9 | 	static int[][] a = {
10 | 		{1,2,3},
11 |         {2,3,4},
12 |         {3,4,5}
13 | 		};
14 | 	static int[][] b = {
15 | 		{3,4,5},
16 | 		{4,5,6},
17 | 		{5,6,7}
18 | 		};
19 | 	static int[][] c = {		{0,0,0},		{0,0,0},  {0,0,0}		}; //用来存放a 与 b相乘的值
20 | 
21 | 	public static void main(String[] args) {
22 | 
23 | 		//正常计算规则计算
24 | 		normalCheng();
25 | 	}
26 | 
27 | 	private static void normalCheng() {
28 | //		 TODO Auto-generated method stub
29 | 		for(int line_a = 0 ; line_a< a.length; line_a ++ )               
30 | 		{
31 | 			for(int line_b =0 ; line_b< b.length; line_b++)
32 | 			{
33 | 				c[line_a][line_b] = 0;
34 | 				for (int k = 0 ;k< b[0].length; k ++)
35 | 				{
36 | 					c[line_a][line_b]= c[line_a][line_b] + a[line_a][k] * b[k][line_b];
37 | 				}
38 | 			}
39 | 		}
40 | 		printMatrix();
41 | 		
42 | 	}
43 | 
44 | 	//打印出得到的乘积
45 | 	private static void printMatrix() {
46 | 		// TODO Auto-generated method stub
47 | 		//打印a
48 | 		System.out.println("a 矩阵：");
49 | 		for(int i =0; i< a.length; i++)
50 | 		{
51 | 			for(int j =0;j < a[0].length; j ++)
52 | 				System.out.print( a[i][j] + "\t");
53 | 			System.out.println();
54 | 		}
55 | 		//打印b
56 | 		System.out.println("b 矩阵：");
57 | 		for(int i =0; i< b.length; i++)
58 | 		{
59 | 			for(int j =0;j < b[0].length; j ++)
60 | 				System.out.print( b[i][j] + "\t");
61 | 			System.out.println();
62 | 		}
63 | 		//打印乘积矩阵			
64 | 		System.out.println("乘积矩阵：");
65 | 		for(int i =0 ; i< c.length; i ++)
66 | 		{
67 | 			for (int j =0;j<c[0].length; j ++)
68 | 				System.out.print( c[i][j] + "\t");
69 | 			System.out.println();
70 | 		}
71 | 	}
72 | 	
73 | }
74 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Matrix/maxArr.java:
--------------------------------------------------------------------------------
 1 | package Matrix;
 2 | 
 3 | /*
 4 |  * 查找数组中的最大字串
 5 |  */
 6 | 
 7 | public class maxArr {
 8 | 
 9 | 	static int[] arr = {
10 | 			1, -2, 3, 10, -4, 7, 2, -5				//定义数组，含有正数和负数	        
11 | 	};
12 | 	static int maxIndex = arr.length-1;   //数组的最大下标
13 | 	
14 | 	public static void main(String[] args) {
15 | 		//查找最大子串的两种算法
16 | 		findMaxArr2();
17 | 		System.out.println("\n=====================");
18 | 		findMaxArr3();
19 | }
20 | 
21 | 	//算法复杂度 n(n-1)
22 | 	private static void findMaxArr2() 
23 | 	{
24 | 		// TODO Auto-generated method stub
25 | 		
26 | 		int max = arr[0];
27 | 		int sum = 0;
28 | 		int startIndex = 0;       //记录最大字串的起始位置
29 | 		int endIndex = 0;         //记录最大字串的结束位置
30 | 		for(int i =0 ;i<maxIndex;i++)
31 | 		{
32 | 			sum = 0;
33 | 			for(int j = i ; j < maxIndex; j ++)
34 | 			{
35 | 				sum += arr[j];
36 | 				if(sum > max)
37 | 				{
38 | 					max = sum;
39 | 				    startIndex = i;
40 | 					endIndex = j;
41 | 				}
42 | 			}
43 | 		}
44 | 		System.out.println("Max sum is :" + max);      //输出最大子数组和
45 | 		printMaxArr(startIndex, endIndex);                 //输出最大子数组
46 | 	}
47 | 	
48 | 	//算法复杂度 n
49 | 	private static void findMaxArr3()
50 | 	{
51 | 		// TODO Auto-generated method stub
52 | 		int max = arr[0];
53 | 		int sum = 0;
54 | 		int startIndex = 0;       //记录最大子串的起始位置
55 | 		int endIndex = 0 ;   // 记录最大子串的结束位置
56 | 		for ( int i =0 ; i< maxIndex; i ++)
57 | 		{
58 | 			if ( sum >= 0)
59 | 			{
60 | 				sum += arr[i];
61 | 			}
62 | 			else
63 | 			{
64 | 				sum = arr[i];
65 | 				startIndex = i;
66 | 			}
67 | 			if(sum > max)
68 | 			{
69 | 				max = sum;
70 | 				endIndex = i;
71 | 			}
72 | 		}
73 | 		System.out.println("Max sum is :" + max);
74 | 		printMaxArr(startIndex, endIndex);
75 | 		
76 | 	}
77 | 
78 | 	//输出最大子数组
79 | 	private static void printMaxArr(int startIndex, int endIndex) {
80 | 		// TODO Auto-generated method stub
81 | 		for(int i =startIndex ; i<= endIndex; i ++)
82 | 			System.out.print( arr[i] + "\t");
83 | 	}
84 | 	
85 | 	
86 | }
87 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Queue/Queue.java:
--------------------------------------------------------------------------------
 1 | package Queue;
 2 | 
 3 | /*
 4 |  * 使用java构建队列，并模拟实现队列的入队和出对方法
 5 |  */
 6 | 
 7 | public class Queue {     //队列类
 8 | 
 9 | 	private int maxSize;  //定义队列的长度
10 | 	private int[] arrQueue;      //队列
11 | 	private int rear;     //定义队列的尾指针
12 | 	private int front;   //定义队列的头指针
13 | 	private int empty;  //元素的个数
14 | 	
15 | 	public Queue(int s)   //初始化构造函数
16 | 	{
17 | 		maxSize = s;
18 | 		arrQueue = new int[s];
19 | 		rear = -1;
20 | 		front=0;
21 | 		empty = 0;
22 | 	}
23 | 	
24 | 	//实现插入方法
25 | 	public void insert(int m)
26 | 	{
27 | 		if(rear == maxSize-1)   //处理循环
28 | 			rear = -1;      
29 | 		arrQueue[++rear] = m;   //对尾指针加一，把值放在队列结尾
30 | 		empty++;      //队列元素个数加1
31 | 		System.out.println("队列入队元素 为：" + m);
32 | 	}
33 | 	
34 | 	//实现出栈的方法，即取得队列的头元素
35 | 	public int remove()
36 | 	{
37 | 		int temp = arrQueue[front++]; //将栈顶元素赋值给temp，栈顶指针加1
38 | 		if(front == maxSize) //处理循环
39 | 			front = 0;
40 | 		empty--; //元素个数-1
41 | 		return temp;
42 | 	}
43 | 	
44 | 	//判断队列是否为空
45 | 	public boolean isEmpty()
46 | 	{
47 | 		return (empty==0);
48 | 	}
49 | 	
50 | 	//判断对列是否为满
51 | 	public boolean isFull()
52 | 	{
53 | 		return (empty == maxSize);
54 | 	}
55 | 	
56 | 	//返回队列长度
57 | 	public int qLong()
58 | 	{
59 | 		return empty;
60 | 	}
61 | 	
62 | 	public static void main(String[] args) {
63 | 		Queue q = new Queue(5); //初始化队列为5个元素
64 | 		
65 | 		q.insert(1);
66 | 		q.insert(2);
67 | 		q.insert(3);
68 | 		q.insert(4);
69 | 		q.insert(5);
70 | 		
71 | 		int t1 = q.remove();
72 | 		System.out.println("队列元素出队：" + t1);
73 | 		int t2 = q.remove();
74 | 		System.out.println("队列元素出队：" + t2);
75 | 		
76 | 		System.out.println("队列是否为空：" + q.isEmpty());
77 | 		System.out.println("队列是否为满：" + q.isFull());
78 | 		System.out.println("队列的长度：" + q.qLong());
79 | 	}
80 | 	
81 | }
82 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Statck/Statck1.java:
--------------------------------------------------------------------------------
 1 | package Statck;
 2 | /*
 3 |  * 使用java构建栈，并模拟实现栈的入栈和出栈方法
 4 |  * 使用数组实现
 5 |  */
 6 | 
 7 | public class Statck1 {
 8 | 
 9 | 	private int maxSize;     //栈的最多元素数
10 | 	private int top;    //栈顶指针
11 | 	private int len;     //栈的深度
12 | 	private int[] arrStack; // 模拟栈
13 | 	
14 | 	//栈的初始化
15 | 	public Statck1(int s){
16 | 		maxSize = s;
17 | 		len =0;
18 | 		top= -1;
19 | 		arrStack = new int[s];
20 | 	}
21 | 	
22 | 	//获取栈的长度
23 | 	public int getLen(){
24 | 		return len;
25 | 	}
26 | 	
27 | 	//获取当前栈还能插入多少个f元素
28 | 	public int getLeaveLen(){
29 | 		return (maxSize-len);
30 | 	}
31 | 	//判断栈是否满
32 | 	public boolean isFull(){
33 | 		return (len==maxSize);
34 | 	}
35 | 	
36 | 	//判断栈是否为空
37 | 	public boolean isEmpty(){
38 | 		return (len ==0);
39 | 	}
40 | 	
41 | 	//元素入栈
42 | 	public void inStack(int s)
43 | 	{
44 | 		arrStack[++top] = s; //栈顶指针加1,入栈
45 | 		System.out.println("元素入栈：" + s);
46 | 		len ++ ;//栈深度+1
47 | 	}
48 | 	
49 | 	//元素出栈
50 | 	public int outStack()
51 | 	{
52 | 		int temp = arrStack[top--];//赋值之后减1
53 | 		System.out.println("元素出栈：" + temp);
54 | 		len--;   //栈深度-1
55 | 		return temp;
56 | 	}
57 | 	
58 | 	public static void main(String[] args) {
59 | 		Statck1 s = new Statck1(5);
60 | 		
61 | 		s.inStack(1);
62 | 		s.inStack(2);
63 | 		s.inStack(3);
64 | 		s.inStack(4);
65 | 		s.inStack(5);
66 | 		
67 | 		s.outStack();
68 | 		s.outStack();
69 | 		System.out.println("栈的长度：" + s.getLen());
70 | 		System.out.println("还能入栈元素个数：" + s.getLeaveLen());
71 | 		System.out.println("栈的是否为空：" + s.isEmpty());
72 | 		System.out.println("栈的是否为满：" + s.isFull());
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/Statck/Statck2.java:
--------------------------------------------------------------------------------
 1 | package Statck;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.EmptyStackException;
 5 | import java.util.List;
 6 | 
 7 | /*
 8 |  * 使用java构建栈，并模拟实现栈的入栈和出栈方法
 9 |  * 使用链表实现
10 |  */
11 | 
12 | public class Statck2<E extends Object> {  
13 | 	
14 |     private List<E> statck = new ArrayList<E>(); 
15 | 	
16 | 	public Statck2(){
17 | 		      //栈的初始化
18 | 	}
19 | 	
20 | 	//清空栈
21 | 	public void clear(){
22 | 		statck.clear();
23 | 		System.out.println("清空栈..........");
24 | 	}
25 | 	//判断栈是否为空
26 | 	public boolean isEmpty(){
27 | 		return statck.isEmpty();
28 | 	}
29 | 	//获取栈顶元素
30 | 	public E getTop(){
31 | 		if(isEmpty())
32 | 			return null;
33 | 		return statck.get(0);
34 | 	}
35 | 	
36 | 	//弹出栈操作
37 | 	public E pop(){
38 | 		if (isEmpty()) 
39 | 			throw new EmptyStackException();  
40 | 		System.out.println(statck.size() + "\t 出栈");
41 |         return statck.remove(statck.size() - 1);  
42 | 	}
43 | 	
44 | 	//压入栈操作
45 | 	public void push(E e){
46 | 		statck.add(e);
47 | 		System.out.println(e + "\t 入栈");
48 | 	}
49 | 	
50 | 	//获取当前栈的深度
51 | 	public int getStatckSize(){
52 | 		if(isEmpty())
53 | 			throw new EmptyStackException();
54 | 		return statck.size();
55 | 	}
56 | 	
57 | 	public static void main(String[] args) {
58 | 		Statck2 s = new Statck2();
59 | 		s.clear();           //清空栈
60 | 		System.out.println("当前栈是否为空：" + s.isEmpty());
61 | 		s.push(1);
62 | 		s.push(2);
63 | 		s.push(3);
64 | 		
65 | 		s.pop();
66 | 		System.out.println("当前栈的深度为：" + s.getStatckSize());
67 | 		System.out.println("当前栈顶元素为：" + s.getTop());
68 | 	}
69 | 	
70 | }
71 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/TestCode/BitTreeExample.java:
--------------------------------------------------------------------------------
 1 | package TestCode;
 2 | 
 3 | /*
 4 |  * 题目描述：每一次移除二叉树的所有叶子节点，有已知的移除叶子节点序列得到原本的二叉树
 5 |  * eg:   AB
 6 |  *          C
 7 |  *          CAB
 8 |  */
 9 | public class BitTreeExample {
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/TestCode/HashTableExample.java:
--------------------------------------------------------------------------------
 1 | package TestCode;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.IOException;
 5 | import java.io.InputStreamReader;
 6 | import java.util.Hashtable;
 7 | import java.util.StringTokenizer;
 8 | 
 9 | /*
10 |  * 题目描述：输入读应的几种字符串，其中一个是english，一个是外语开始是输入字典，后来是根据外语来查询字典，没有时输出"en"
11 |  */
12 | 
13 | public class HashTableExample {
14 | 	public static void main(String[] args) throws IOException {
15 | 		BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));
16 | 		Hashtable <String, String> table = new Hashtable<String, String>();
17 | 		String s = "";
18 | 		String[] arr  = new String[2];
19 | 		while(true)
20 | 		{
21 | 			s = stdin.readLine();
22 | 			if(s.equals(""))
23 | 				break;
24 | 			arr=s.split(" ");
25 | 			table.put(arr[1],arr[0]);
26 | 		}
27 | 		while(true)
28 | 		{
29 | 			s = stdin.readLine();
30 | 			if(table.get(s) != null )
31 | 				System.out.println(table.get(s));
32 | 			else
33 | 				System.out.println("eh");
34 | 		}
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/TestCode/fenZhiTest.java:
--------------------------------------------------------------------------------
 1 | package TestCode;
 2 | 
 3 | /*
 4 |  * 题目描述：给定平面上的N个点，计算任意两点的最近距离（N范围是<10000）
 5 |  */
 6 | 
 7 | public class fenZhiTest {
 8 | 	
 9 | 	
10 | 	public static void main(String[] args) {
11 | 		
12 | 	}
13 | }
14 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/TestCode/guibingTest.java:
--------------------------------------------------------------------------------
 1 | package TestCode;
 2 | 
 3 | import java.util.Arrays;
 4 | /*
 5 |  * 问题描述
 6 |  * 有N个整数，A[1]，A[2]，A[3]，....，A[N]。需要找打这样的（i，j）的数对的数量
 7 |  * 满足 1 <= i < j <=N, A[i] > A[j]。数据范围：1<= N <= 65537，0 <=A[i] <=10^9
 8 |  */
 9 | public class guibingTest {
10 | 
11 | 	static int[] arr = {
12 | 		3,4,1,5,2,6              //示例数组
13 | 	};
14 | 	static int num = 0; //记录满足条件的对数
15 | 	
16 | 	public static void main(String[] args) {
17 | 		MergeSort(arr, 0, 5);
18 | 		System.out.println("满足条件的逆序数 共:   " + num + "对");
19 | 
20 | 	}
21 | 	
22 | 	//归并排序寻找满足条件的对数
23 | 		private static void MergeSort(int[] arr, int low, int high) {
24 | 			// TODO Auto-generated method stub
25 | 			int mid = (low + high) /2;
26 | 			if(low<high){
27 | 				//拆分进行排序
28 | 				MergeSort(arr, low,mid);
29 | 				MergeSort(arr, mid+1, high);
30 | 				Merge(arr,low,mid+1,high);        //合并排序后的数据
31 | 			}
32 | 		}
33 | 
34 | 		//合并函数
35 | 		private static void Merge(int[] arr, int low, int mid, int high) {
36 | 			// TODO Auto-generated method stub
37 | 			int len_L = mid-low; // 左数组的长度
38 | 			int len_R = high-mid+1; //右数组的长度
39 | 			int[] L = new int[len_L];     //定义左数组
40 | 			int[] R = new int[len_R ]; //定义右数组
41 | 			
42 | 			//给左数组赋值
43 | 			for(int i =0 ; i< len_L;i ++)
44 | 				L[i] = arr[low+i];
45 | 			
46 | 			//给右数组赋值
47 | 			for(int j =0 ;j< len_R; j++)
48 | 				R[j] = arr[mid + j];
49 | 			
50 | 			//比较L 和 R 数组,若满足条件 计数器+1
51 | 			for(int i =0 ; i < len_L; i ++)
52 | 				for(int j=0; j <len_R; j ++)
53 | 					if(L[i] <= R[j])
54 | 					{
55 | 						System.out.println(L[i] + "\t" + R[j]);
56 | 						num++;
57 | 					}
58 | 		}
59 | 		
60 | }
61 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/sort/duiSort.java:
--------------------------------------------------------------------------------
 1 | package sort;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | /*
 6 |  * 堆排序
 7 |  * 堆的定义：满足 Ki <= K2i+1 Ki<=K2i+2 为小顶堆，满足 Ki >= K2i+1 Ki>=K2i+2 为大顶堆
 8 |  * 此为大顶堆的代码实例，小顶堆类似
 9 |  */
10 | public class duiSort {
11 | 
12 | 	static int[] arr  = {
13 | 		16,7,3,20,17,8                   //定义待排序数组
14 | 	}; 
15 | 	public static void main(String[] args) {
16 | 		
17 | 		buildHeap();//建立大顶堆并排序
18 | 		System.out.println("排序好的为：" + Arrays.toString(arr));
19 | 	}
20 | 	
21 | 	private static void buildHeap() {
22 | 		// TODO Auto-generated method stub
23 | 		int len = arr.length;
24 | 		for(int i =len/2 -1 ;i>=0;i--)            //建立大顶堆
25 | 		{
26 | 			sortHeap(i,len);
27 | 		}
28 | 		System.out.println("建立好的大顶堆如下：" + Arrays.toString(arr));
29 | 		for(int j = len-1; j >0; j --)        //对大顶堆进行排序
30 | 		{
31 | 			swap(0,j);
32 | 			sortHeap(0,j);
33 | 		}
34 | 	}
35 | 
36 | 	private static void sortHeap(int i, int len) {
37 | 		// TODO Auto-generated method stub
38 | 		int left = 2*i+1;         //定义左节点
39 | 		int right = 2*i +2;     //定义右节点
40 | 		int large = 0;         //存放三个节点中最大节点的下标
41 | 		if(len >left && arr[left] > arr[i])    //如果左孩子大于根节点 将左孩子下标赋值给large
42 | 			large = left; 
43 | 		else                                                   //否之，将根节点下标赋值给large
44 | 			large = i;
45 | 		
46 | 		if(len > right && arr[right] > arr[large])
47 | 			large = right;                                //若右孩子节点大于根节点，把右孩子节点下标赋值给large
48 | 		
49 | 		if(large != i)                  //若最大节点的下标不等于根节点的下标时，交换其值
50 | 		{
51 | 			swap(large,i);
52 | 			sortHeap(large,len);
53 | 		}
54 | 	}
55 | 	//交换对应下标值
56 | 	private static void swap(int m, int n) {
57 | 		// TODO Auto-generated method stub
58 | 		int temp ;
59 | 		temp = arr[m];
60 | 		arr[m] = arr[n];
61 | 		arr[n] = temp;
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/sort/guibing.java:
--------------------------------------------------------------------------------
 1 | package sort;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | /*
 6 |  *归并排序
 7 |  *时间复杂度 n*lg n 
 8 |  */
 9 | 
10 | public class guibing {
11 | 
12 | 	public static void main(String[] args) {
13 | 		//定义数组
14 | 		int[] arr = {6,2,4,1,9,65,23,12};
15 | 		
16 | 		//调用归并排序算法
17 | 		MergeSort(arr,0,7);
18 | 		
19 | 		System.out.println(Arrays.toString(arr));
20 | 	}
21 | 
22 | 	//归并排序算法
23 | 	private static void MergeSort(int[] arr, int low, int high) {
24 | 		// TODO Auto-generated method stub
25 | 		int mid = (low + high) /2;
26 | 		if(low<high){
27 | 			//拆分进行排序
28 | 			MergeSort(arr, low,mid);
29 | 			MergeSort(arr, mid+1, high);
30 | 			Merge(arr,low,mid+1,high);        //合并排序后的数据
31 | 		}
32 | 	}
33 | 
34 | 	//合并函数
35 | 	private static void Merge(int[] arr, int low, int mid, int high) {
36 | 		// TODO Auto-generated method stub
37 | 		int len_L = mid-low; // 左数组的长度
38 | 		int len_R = high-mid+1; //右数组的长度
39 | 		int[] L = new int[len_L+1];     //定义左数组
40 | 		int[] R = new int[len_R +1]; //定义右数组
41 | 		
42 | 		//给左数组赋值，最后一个元素赋予最大值，避免所有元素比较后，另一个数组的所有元素能存入新的数组
43 | 		for(int i =0 ; i< len_L;i ++)
44 | 			L[i] = arr[low+i];
45 | 		L[len_L] = Integer.MAX_VALUE;
46 | 		
47 | 		//给右数组赋值，最后一个元素赋予最大值
48 | 		for(int j =0 ;j< len_R; j++)
49 | 			R[j] = arr[mid + j];
50 | 		R[len_R] = Integer.MAX_VALUE;
51 | 		
52 | 		//比较L 和 R 数组，将较小的元素赋值给新数组
53 | 		int i = 0;
54 | 		int j = 0;
55 | 		for ( int k = low;k < high+1 ; k++){
56 | 			if ( L[i] < R [j]){
57 | 				arr[k] = L[i];
58 | 				i++;
59 | 			}
60 | 			else{
61 | 				arr[k] = R [j];
62 | 			    j++;
63 | 			}
64 | 		}
65 | //		System.out.println(Arrays.toString(arr));
66 | 	}
67 | 	
68 | }
69 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/sort/insertSort.java:
--------------------------------------------------------------------------------
 1 | package sort;
 2 | 
 3 | import java.util.Scanner;
 4 | /*
 5 |  * 插入排序
 6 |  * 时间复杂度： theta n^2
 7 |  */
 8 | public class insertSort {
 9 | 
10 | 	public static void main(String[] args) {
11 | //		从键盘输入数组输入数组
12 | //		Scanner scan = new Scanner(System.in);
13 | //		int[] arr = new int[4];
14 | //		for(int i =0;i<4;i++){
15 | //			arr[i] = scan.nextInt();
16 | //		}
17 | 		int[] arr ={3,1,5,2};
18 | 		//插入排序
19 | 		for(int i = 1; i<arr.length;i++){
20 | 			int key = arr[i];
21 | 			int j =i;
22 | 			while(j>0 && arr[j-1] > key){
23 | 				arr[j] = arr[j-1];
24 | 				j =  j-1;
25 | 			}
26 | 			arr[j] = key;
27 | 		}
28 | 		//输出数组
29 | 		System.out.println("排序后的数组为：");
30 | 		for(int i=0;i<arr.length;i++)
31 | 			System.out.print(arr[i] + "\t");
32 | 	}
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/Java/Dataguru算法导论/sort/quickSort.java:
--------------------------------------------------------------------------------
 1 | package sort;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | public class quickSort {
 6 | 	public static int a[] = {6,1,2,7,9,3,4,5,10,8};   //定义全局数组a
 7 | 	
 8 | 	public static void Sort(int low,int high){
 9 | 		if(low>high)
10 | 			return;
11 | 		int temp; //保存基准值
12 | 		int left=0,right=0,empty=0;
13 | 		temp=a[low];                   //将每次进来的最左边的值作为基准值
14 | 		left = low;                     //每次移动的指针初始值最左边的位置复制给left
15 | 		right = high;					//每次移动的指针初始值最右边的位置复制给right
16 | 		while(left!=right){             //判断循环结束的条件
17 | 			while(a[right]>=temp && left<right)           //找到比基准数小的数,先从右边开始寻找
18 | 				right--; 
19 | 			while(a[left]<=temp && left<right)			 //找到比基准数大的数
20 | 				left++;
21 | 			if(left<right) {// 交换数，保证基准数左边的数比基准数小，右边的数比基准数大
22 | 			empty = a[left];
23 | 			a[left]=a[right];
24 | 			a[right]=empty;
25 | 			}
26 | 		}
27 | 		    //交换基准数
28 | 		a[low]=a[left];
29 | 		a[left]=temp;
30 | 		Sort(low,left-1);    //递归处理基准数左边
31 | 		Sort(left+1,high);   //递归处理基准数右边
32 | 	}
33 | 	
34 | 	public static void main(String[] args) {
35 | 		// TODO Auto-generated method stub
36 | 		Sort(0,9);  //进入快速排序
37 | 		System.out.println(Arrays.toString(a));
38 | 	}
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/Java/一些小项目/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Java/一些小项目/README.md


--------------------------------------------------------------------------------
/Mahout/README.md:
--------------------------------------------------------------------------------
1 | 本目录下主要是我下机器学习算法和实现，操作代码托管地方，代码质量不一定高，但是尽我所能去写好每次的code，欢迎补充
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hadoop-Spark-Learning
 2 | 本项目记录我学习hadoop和spark等开源框架的代码，因为也是最近才用github，之前都是荒废状态，故部分都是是之前写好的，现在上传至github<br/>
 3 | 
 4 | 本部分分为多个项目，会涉及目前比较火的大数据的相关概念，比如说spark，hadoop，mahout，hbase，hive，openstack，storm等，目前主要学习hadoop和mahout，后续有时间和精力的话，会涉及更多，也欢迎大家即使补充相关代码，大家一起学习，一起进步<br/>
 5 | 
 6 | 1:Hadoop 目录<br><br>
 7 | 
 8 | 2:Spark 目录<br><br>
 9 | 
10 | 3:Mahout 目录<br><br>
11 | 
12 | 4:Hive 目录<br><br>
13 | 
14 | 5:Hbase 目录<br><br>
15 | 
16 | 6:Java 目录<br><br>
17 | 
18 | 7:cluster_conf 目录<br>
19 | > 集群的配置文件备份
20 | 
21 | Email：Thinkagmer_gyt@gmail.com<br>
22 | QQ：1923361654<br>
23 | WeChat：gyt13342445911<br>
24 | 微博：<a href="http://weibo.com/234654758">Thinkgamer<a>
25 | 


--------------------------------------------------------------------------------
/Spark/ChineseWordSplitCount/WordAnalyzer jar包链接.txt:
--------------------------------------------------------------------------------
1 | http://pan.baidu.com/s/1mihghmg


--------------------------------------------------------------------------------
/Spark/ChineseWordSplitCount/blog href.txt:
--------------------------------------------------------------------------------
1 | http://blog.csdn.net/gamer_gyt/article/details/52194773


--------------------------------------------------------------------------------
/Spark/ChineseWordSplitCount/wordSplitCount.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Thinkgamer/Hadoop-Spark-Learning/46f24ae930fc6d426ad14989cb6dbad1e7966d8e/Spark/ChineseWordSplitCount/wordSplitCount.py


--------------------------------------------------------------------------------
/Spark/PageRank/Jar包链接.txt:
--------------------------------------------------------------------------------
1 | http://pan.baidu.com/s/1miASxny


--------------------------------------------------------------------------------
/Spark/README.md:
--------------------------------------------------------------------------------
1 | 本目录下主要是我对Spark操作代码托管地方，代码质量不一定高，但是尽我所能去写好每次的code，欢迎补充
2 | 


--------------------------------------------------------------------------------
/Spark/pairRDD/driver:
--------------------------------------------------------------------------------
 1 | package week2
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | import org.apache.spark.SparkContext._
 5 | 
 6 | object WordCount1 {
 7 |   def main(args: Array[String]) {
 8 |     if (args.length == 0) {
 9 |       System.err.println("Usage: WordCount1 <file1>")
10 |       System.exit(1)
11 |     }
12 | 
13 |     val conf = new SparkConf().setAppName("WordCount1")
14 |     val sc = new SparkContext(conf)
15 |     
16 |     .....//此处写你编写的Spark代码
17 | 
18 |     sc.stop()
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/Spark/pairRDD/example:
--------------------------------------------------------------------------------
  1 | scala:
  2 | #创建pair RDD
  3 | var lines = sc.parallelize(List("i love you"))
  4 | val pairs = lines.map(x=>(x,1))
  5 | pairs.foreach(println)
  6 | 
  7 | =============================================================
  8 | #针对一个pair RDD的转化操作
  9 | #rdd.reduceByKey(func)：合并具有相同key的value值
 10 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 11 | val rdd.reduceByKey((x,y)=>x+y)
 12 | result.foreach(println)
 13 | 
 14 | #rdd.groupByKey(func)：对具有相同键的进行分组
 15 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 16 | val result = rdd.groupByKey()
 17 | result.foreach(println)
 18 | 
 19 | #rdd.mapValues(func)：对pairRDD中的每个值应用func 键不改变
 20 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 21 | val result = rdd.mapValues(x=>x+1)
 22 | result.foreach(println)
 23 | 
 24 | #rdd.flatMapValues(func):类似于mapValues，返回的是迭代器函数
 25 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 26 | val result = rdd.flatMapValues(x=>(x to 5))
 27 | result.foreach(println)
 28 | 
 29 | #rdd.keys：返回一个仅包含键的RDD
 30 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 31 | val result = rdd.keys
 32 | result.foreach(println)
 33 | 
 34 | #rdd.values：返回一个仅包含value的RDD
 35 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 36 | val result = rdd.values
 37 | result.foreach(println)
 38 | 
 39 | #rdd.sortByKey()：返回一个根据键排序的RDD
 40 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 41 | val result = rdd.sortByKey().collect()
 42 | result
 43 | 
 44 | ===================================================================
 45 | #针对两个pair RDD的转化操作
 46 | #rdd.subtractByKey( other )：删除掉RDD中与other RDD中键相同的元素
 47 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 48 | val other = sc.parallelize(List((3,9)))
 49 | val result = rdd.subtractByKey(other)
 50 | result.foreach(println)
 51 | 
 52 | #rdd.join( other )：对两个RDD进行内连接
 53 | val result = rdd.join(other)
 54 | result.foreach(println)
 55 | 
 56 | #rdd.rightOuterJoin(other),对两个RDD进行连接操作，确保第一个RDD的键必须存在（右外连接）
 57 | val result = rdd.rightOuterJoin(other)
 58 | result.foreach(println)
 59 | 
 60 | #rdd.leftOuterJoin(other)：对两个RDD进行连接操作，确保第一个RDD的键必须存在（左外连接）
 61 | val result = rdd.leftOuterJoin(other)
 62 | result.foreach(println)
 63 | 
 64 | #rdd.cogroup(other),将有两个rdd中拥有相同键的数据分组
 65 | val result = rdd.cogroup(other)
 66 | result.foreach(println)
 67 | 
 68 | 
 69 | 
 70 | #聚合操作
 71 | #使用reduceByKey()和mapValues()计算每个键对应的平均值
 72 | val rdd = sc.parallelize(List(Tuple2("panda",0),Tuple2("pink",3),Tuple2("pirate",3),Tuple2("panda",1),Tuple2("pink",4)))
 73 | val result = rdd.mapValues(x=>(x,1)).reduceByKey((x,y)=>(x._1+y._1,x._2+y._2))
 74 | result.foreach(println)
 75 | 
 76 | #实现经典的分布式单词计数问题（使用flatMap() 来生成以单词为键，以数字1为值的pair RDD）
 77 | val rdd = sc.parallelize(List("i am thinkgamer, i love cyan"))
 78 | val words = rdd.flatMap(line => line.split(" "))
 79 | val result = words.map(x=>(x,1)).reduceByKey((x,y) => x+y)
 80 | result.foreach(println)
 81 | 
 82 | #实现经典的分布式单词计数问题（使用countByValue更快的实现单词计数）
 83 | val rdd = sc.parallelize(List("i am thinkgamer, i love cyan"))
 84 | val result = rdd.flatMap(x=>x.split(" ")).countByValue()
 85 | result.foreach(println)
 86 | 
 87 | #combineByKey()是最为常用的基于键进行聚合的函数，大多数基于键聚合的函数都是用它实现的，和aggregate()一样，combineByKey()可以让用户返回与输入数据类型不同的返回值
 88 | 
 89 | val data = Seq(("a",3),("b",4),("c",5))
 90 | sc.parallelize(data).reduceByKey((x,y)=>x+y) //默认并行度
 91 | sc.parallelize(data).reduceByKey((x,y)=>x+y,10) //自定义并行度
 92 | 
 93 | #获取RDD的分区方式
 94 | scala> val pairs = sc.parallelize(List((1,1),(2,2),(3,3)))
 95 | pairs: org.apache.spark.rdd.RDD[(Int, Int)] = ParallelCollectionRDD[9] at parallelize at <console>:27
 96 | 
 97 | scala> pairs.partitioner
 98 | res4: Option[org.apache.spark.Partitioner] = None
 99 | 
100 | scala> val partitioned = pairs.partitionBy(new org.apache.spark.HashPartitioner(2))
101 | partitioned: org.apache.spark.rdd.RDD[(Int, Int)] = ShuffledRDD[10] at partitionBy at <console>:29
102 | 
103 | scala> partitioned.partitioner
104 | res5: Option[org.apache.spark.Partitioner] = Some(org.apache.spark.HashPartitioner@2)
105 | 
106 | 


--------------------------------------------------------------------------------
/Spark/pairRDD/example~:
--------------------------------------------------------------------------------
  1 | scala:
  2 | #创建pair RDD
  3 | var lines = sc.parallelize(List("i love you"))
  4 | val pairs = lines.map(x=>(x,1))
  5 | pairs.foreach(println)
  6 | 
  7 | =============================================================
  8 | #针对一个pair RDD的转化操作
  9 | #rdd.reduceByKey(func)：合并具有相同key的value值
 10 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 11 | val rdd.reduceByKey((x,y)=>x+y)
 12 | result.foreach(println)
 13 | 
 14 | #rdd.groupByKey(func)：对具有相同键的进行分组
 15 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 16 | val result = rdd.groupByKey()
 17 | result.foreach(println)
 18 | 
 19 | #rdd.mapValues(func)：对pairRDD中的每个值应用func 键不改变
 20 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 21 | val result = rdd.mapValues(x=>x+1)
 22 | result.foreach(println)
 23 | 
 24 | #rdd.flatMapValues(func):类似于mapValues，返回的是迭代器函数
 25 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 26 | val result = rdd.flatMapValues(x=>(x to 5))
 27 | result.foreach(println)
 28 | 
 29 | #rdd.keys：返回一个仅包含键的RDD
 30 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 31 | val result = rdd.keys
 32 | result.foreach(println)
 33 | 
 34 | #rdd.values：返回一个仅包含value的RDD
 35 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 36 | val result = rdd.values
 37 | result.foreach(println)
 38 | 
 39 | #rdd.sortByKey()：返回一个根据键排序的RDD
 40 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 41 | val result = rdd.sortByKey().collect()
 42 | result
 43 | 
 44 | ===================================================================
 45 | #针对两个pair RDD的转化操作
 46 | #rdd.subtractByKey( other )：删除掉RDD中与other RDD中键相同的元素
 47 | val rdd = sc.parallelize(List((1,2),(3,4),(3,6)))
 48 | val other = sc.parallelize(List((3,9)))
 49 | val result = rdd.subtractByKey(other)
 50 | result.foreach(println)
 51 | 
 52 | #rdd.join( other )：对两个RDD进行内连接
 53 | val result = rdd.join(other)
 54 | result.foreach(println)
 55 | 
 56 | #rdd.rightOuterJoin(other),对两个RDD进行连接操作，确保第一个RDD的键必须存在（右外连接）
 57 | val result = rdd.rightOuterJoin(other)
 58 | result.foreach(println)
 59 | 
 60 | #rdd.leftOuterJoin(other)：对两个RDD进行连接操作，确保第一个RDD的键必须存在（左外连接）
 61 | val result = rdd.leftOuterJoin(other)
 62 | result.foreach(println)
 63 | 
 64 | #rdd.cogroup(other),将有两个rdd中拥有相同键的数据分组
 65 | val result = rdd.cogroup(other)
 66 | result.foreach(println)
 67 | 
 68 | 
 69 | 
 70 | #聚合操作
 71 | #使用reduceByKey()和mapValues()计算每个键对应的平均值
 72 | val rdd = sc.parallelize(List(Tuple2("panda",0),Tuple2("pink",3),Tuple2("pirate",3),Tuple2("panda",1),Tuple2("pink",4)))
 73 | val result = rdd.mapValues(x=>(x,1)).reduceByKey((x,y)=>(x._1+y._1,x._2+y._2))
 74 | result.foreach(println)
 75 | 
 76 | #实现经典的分布式单词计数问题（使用flatMap() 来生成以单词为键，以数字1为值的pair RDD）
 77 | val rdd = sc.parallelize(List("i am thinkgamer, i love cyan"))
 78 | val words = rdd.flatMap(line => line.split(" "))
 79 | val result = words.map(x=>(x,1)).reduceByKey((x,y) => x+y)
 80 | result.foreach(println)
 81 | 
 82 | #实现经典的分布式单词计数问题（使用countByValue更快的实现单词计数）
 83 | val rdd = sc.parallelize(List("i am thinkgamer, i love cyan"))
 84 | val result = rdd.flatMap(x=>x.split(" ")).countByValue()
 85 | result.foreach(println)
 86 | 
 87 | #combineByKey()是最为常用的基于键进行聚合的函数，大多数基于键聚合的函数都是用它实现的，和aggregate()一样，combineByKey()可以让用户返回与输入数据类型不同的返回值
 88 | 
 89 | val data = Seq(("a",3),("b",4),("c",5))
 90 | sc.parallelize(data).reduceByKey((x,y)=>x+y) //默认并行度
 91 | sc.parallelize(data).reduceByKey((x,y)=>x+y,10) //自定义并行度
 92 | 
 93 | #获取RDD的分区方式
 94 | val pairs = sc.parallelize(List((1,1),(2,2),(3,3)))
 95 | scala> pairs.partitioner
 96 | res4: Option[org.apache.spark.Partitioner] = None
 97 | scala> val partitioned = pairs.partitionBy(new org.apache.spark.HashPartitioner(2))
 98 | partitioned: org.apache.spark.rdd.RDD[(Int, Int)] = ShuffledRDD[10] at partitionBy at <console>:29
 99 | 
100 | scala> partitioned.partitioner
101 | res5: Option[org.apache.spark.Partitioner] = Some(org.apache.spark.HashPartitioner@2)
102 | 
103 | 


--------------------------------------------------------------------------------
/Spark/pairRDD/sample:
--------------------------------------------------------------------------------
  1 | //parallelize演示
  2 | val num=sc.parallelize(1 to 10)
  3 | val doublenum = num.map(_*2)
  4 | val threenum = doublenum.filter(_ % 3 == 0)
  5 | threenum.collect
  6 | threenum.toDebugString
  7 | 
  8 | val num1=sc.parallelize(1 to 10,6)
  9 | val doublenum1 = num1.map(_*2)
 10 | val threenum1 = doublenum1.filter(_ % 3 == 0)
 11 | threenum1.collect
 12 | threenum1.toDebugString  //查看依赖
 13 | 
 14 | threenum.cache()
 15 | val fournum = threenum.map(x=>x*x)
 16 | fournum.collect         //可以在web监控界面查看
 17 | fournum.toDebugString 
 18 | threenum.unpersist()    //可以删除cache，立即执行，不像cache函数，需要触发
 19 | 
 20 | num.reduce (_ + _)
 21 | num.take(5)
 22 | num.first
 23 | num.count
 24 | num.take(5).foreach(println)
 25 | 
 26 | //K-V演示
 27 | val kv1=sc.parallelize(List(("A",1),("B",2),("C",3),("A",4),("B",5)))
 28 | kv1.sortByKey().collect //注意sortByKey的小括号不能省
 29 | kv1.groupByKey().collect
 30 | kv1.reduceByKey(_+_).collect
 31 | 
 32 | val kv2=sc.parallelize(List(("A",4),("A",4),("C",3),("A",4),("B",5)))
 33 | kv2.distinct.collect
 34 | kv1.union(kv2).collect
 35 | 
 36 | val kv3=sc.parallelize(List(("A",10),("B",20),("D",30)))
 37 | kv1.join(kv3).collect
 38 | kv1.cogroup(kv3).collect
 39 | 
 40 | val kv4=sc.parallelize(List(List(1,2),List(3,4)))
 41 | kv4.flatMap(x=>x.map(_+1)).collect
 42 | 
 43 | //文件读取演示
 44 | val rdd1 = sc.textFile("hdfs://hadoop1:8000/dataguru/week2/directory/")
 45 | rdd1.toDebugString
 46 | val words=rdd1.flatMap(_.split(" "))
 47 | val wordscount=words.map(x=>(x,1)).reduceByKey(_+_)
 48 | wordscount.collect
 49 | wordscount.toDebugString
 50 | 
 51 | val rdd2 = sc.textFile("hdfs://hadoop1:8000/dataguru/week2/directory/*.txt")
 52 | rdd2.flatMap(_.split(" ")).map(x=>(x,1)).reduceByKey(_+_).collect
 53 | 
 54 | //gzip压缩的文件
 55 | val rdd3 = sc.textFile("hdfs://hadoop1:8000/dataguru/week2/test.txt.gz")
 56 | rdd3.flatMap(_.split(" ")).map(x=>(x,1)).reduceByKey(_+_).collect
 57 | 
 58 | //日志处理演示
 59 | //http://download.labs.sogou.com/dl/q.html 完整版(2GB)：gz格式
 60 | //访问时间\t用户ID\t[查询词]\t该URL在返回结果中的排名\t用户点击的顺序号\t用户点击的URL
 61 | //SogouQ1.txt、SogouQ2.txt、SogouQ3.txt分别是用head -n 或者tail -n 从SogouQ数据日志文件中截取n行
 62 | 
 63 | //搜索结果排名第1，但是点击次序排在第2的数据有多少?
 64 | val rdd1 = sc.textFile("hdfs://hadoop1:8000/dataguru/data/SogouQ1.txt")
 65 | val rdd2=rdd1.map(_.split("\t")).filter(_.length==6)
 66 | rdd2.count()
 67 | val rdd3=rdd2.filter(_(3).toInt==1).filter(_(4).toInt==2)
 68 | rdd3.count()
 69 | rdd3.toDebugString
 70 | 
 71 | //session查询次数排行榜
 72 | val rdd4=rdd2.map(x=>(x(1),1)).reduceByKey(_+_).map(x=>(x._2,x._1)).sortByKey(false).map(x=>(x._2,x._1))
 73 | rdd4.toDebugString
 74 | rdd4.saveAsTextFile("hdfs://hadoop1:8000/dataguru/week2/output1")
 75 | 
 76 | 
 77 | //cache()演示
 78 | //检查block命令：bin/hdfs fsck /dataguru/data/SogouQ3.txt -files -blocks -locations
 79 | val rdd5 = sc.textFile("hdfs://hadoop1:8000/dataguru/data/SogouQ3.txt")
 80 | rdd5.cache()
 81 | rdd5.count()
 82 | rdd5.count()  //比较时间
 83 | 
 84 | 
 85 | //join演示
 86 | val format = new java.text.SimpleDateFormat("yyyy-MM-dd")
 87 | case class Register (d: java.util.Date, uuid: String, cust_id: String, lat: Float,lng: Float)
 88 | case class Click (d: java.util.Date, uuid: String, landing_page: Int)
 89 | val reg = sc.textFile("hdfs://hadoop1:8000/dataguru/week2/join/reg.tsv").map(_.split("\t")).map(r => (r(1), Register(format.parse(r(0)), r(1), r(2), r(3).toFloat, r(4).toFloat)))
 90 | val clk = sc.textFile("hdfs://hadoop1:8000/dataguru/week2/join/clk.tsv").map(_.split("\t")).map(c => (c(1), Click(format.parse(c(0)), c(1), c(2).trim.toInt)))
 91 | reg.join(clk).take(2)
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/cluster_conf/README.md:
--------------------------------------------------------------------------------
 1 | # 集群说明：
 2 | 三个节点：分别为master1和slave1，slave2（由于电脑资源有限）
 3 | 
 4 | master1 文件夹中为主节点的配置文件
 5 | 
 6 | slave1 文件夹中为从节点的配置文件（如果有多个从节点，可复制）
 7 | 
 8 | slave2 文件夹中为从节点2的配置文件
 9 | 
10 | # hosts文件修改说明
11 | 
12 | 在主从节点的/etc/hosts中加入以下两行或者多行
13 | 
14 | ```
15 | master1IP master1
16 | slave1IP slave1
17 | slave2IP slave2
18 | ```
19 | 
20 | # java options问题
21 | 
22 | 关于deepin执行java -version显示
23 | ```
24 | Picked up _JAVA_OPTIONS:   -Dawt.useSystemAAFontSettings=gasp
25 | ```
26 | 是正常的，对于强迫症的我，将其除去的办法是：
27 | 
28 | 在 /etc/profile中加入
29 | ```
30 | unset _JAVA_OPTIONS
31 | 
32 | ```
33 | 
34 | ---
35 | 
36 | # 遇到的问题及解决办法*
37 | ## 1： sign_and_send_pubkey: signing failed: agent refused operation
38 | 这是因为ssh 产生的秘钥没有加入到系统中，执行 ssh-add即可
39 | 
40 | ## 2：Error: JAVA_HOME is not set and could not be found.
41 | 原因：我安装java环境的时候采用的deb安装的，所以系统已经有了$JAVA_HOME，但是在hadoop/etc/hadoop/hadoop-env.sh中不识别
42 | >export JAVA_HOME=${JAVA_HOME}
43 | 
44 | 这里将${JAVA_HOME}换成你自己的java环境路径即可，可以通过
45 | >echo $JAVA_HOME
46 | 
47 | 来查看
48 | 
49 | ## 3：hadoop datanode 服务启动不成功
50 | 原因：datanode的clusterID 和 namenode的 clusterID 不匹配
51 | 解决办法：
52 | 根据 hdfs-site.xml 中的配置：
53 | 1、 打开 dfs.namenode.name.dir 配置对应目录下的 current 目录下的 VERSION 文件，拷贝clusterID；
54 | 2、 打开 dfs.datanode.data.dir 配置对应目录下的 current 目录下的 VERSION 文件，用拷贝的 clusterID 覆盖原有的clusterID；
55 | 3、 保存后重新启动 hadoop，datanode 进程就能正常启动了。
56 | 
57 | ## 4：hive配置后启动错误
58 | 错误：Failed to get schema version
59 | 原因：在hive-site.xml配置javax.jdo.option.ConnectionURL value时，我把其中mysql所在的服务器的IP写成了用户名，这里改为localhost或者IP即可
60 | 
61 | ## 5：从节点19888端口无法访问
62 | 
63 | 执行：mr-jobhistory-daemon.sh start historyserver
64 | 
65 | ---
66 | 
67 | # 运行MR在远程集群的两种办法
68 | ## 1：提交jar包
69 | ```
70 | hadoop jar xxx.jar classname inputpath outputpath
71 | ```
72 | 
73 | ## 2：代码中进行配置
74 | ```
75 | Configuration conf = new Configuration();
76 | conf.set("mapreduce.app-submission.cross-platform", "true");
77 | conf.set("yarn.resourcemanager.address", "http://master1:8032");
78 | conf.set("mapreduce.framework.name", "yarn");
79 | Job job = Job.getInstance(conf, "wordcount");
80 | ```
81 | 


--------------------------------------------------------------------------------
/cluster_conf/master1/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>  
20 |   <property>  
21 |     <name>fs.default.name</name>  
22 |     <value>hdfs://master1:9000</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>hadoop.tmp.dir</name>  
26 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/tmp</value>  
27 |   </property>  
28 | </configuration> 
29 | 


--------------------------------------------------------------------------------
/cluster_conf/master1/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>  
21 |     <name>dfs.replication</name>  
22 |     <value>2</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>dfs.namenode.name.dir</name>  
26 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/name</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>dfs.datanode.data.dir</name>  
30 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/data</value>  
31 |   </property>  
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/master1/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |   <property>  
21 |     <name>mapreduce.framework.name</name>  
22 |     <value>yarn</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>mapreduce.jobhistory.address</name>  
26 |     <value>master1:10020</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>mapreduce.jobhistory.webapp.address</name>  
30 |     <value>master1:19888</value>  
31 |   </property> 
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/master1/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 | 
17 | <!-- Site specific YARN configuration properties -->
18 |   <property>  
19 |     <name>yarn.nodemanager.aux-services</name>  
20 |     <value>mapreduce_shuffle</value>  
21 |   </property>  
22 |   <property>  
23 |     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>  
24 |     <value>org.apache.hadoop.mapred.ShuffleHandler</value>  
25 |   </property>  
26 |   <property>  
27 |     <name>yarn.resourcemanager.address</name>  
28 |     <value>master1:8032</value>  
29 |   </property>  
30 |   <property>  
31 |     <name>yarn.resourcemanager.scheduler.address</name>  
32 |     <value>master1:8030</value>  
33 |   </property>  
34 |   <property>  
35 |     <name>yarn.resourcemanager.resource-tracker.address</name>  
36 |     <value>master1:8031</value>  
37 |   </property>  
38 |   <property>  
39 |     <name>yarn.resourcemanager.admin.address</name>  
40 |     <value>master1:8033</value>  
41 |   </property>  
42 |   <property>  
43 |     <name>yarn.resourcemanager.webapp.address</name>  
44 |     <value>master1:8088</value>  
45 |   </property>  
46 | </configuration>
47 | 


--------------------------------------------------------------------------------
/cluster_conf/slave1/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>  
20 |   <property>  
21 |     <name>fs.default.name</name>  
22 |     <value>hdfs://master1:9000</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>hadoop.tmp.dir</name>  
26 |     <value>/home/node1/bigdata/hadoop-2.7.3/tmp</value>  
27 |   </property>  
28 | </configuration> 
29 | 


--------------------------------------------------------------------------------
/cluster_conf/slave1/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>  
21 |     <name>dfs.replication</name>  
22 |     <value>2</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>dfs.namenode.name.dir</name>  
26 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/name</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>dfs.datanode.data.dir</name>  
30 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/data</value>  
31 |   </property>  
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/slave1/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>  
21 |     <name>mapreduce.framework.name</name>  
22 |     <value>yarn</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>mapreduce.jobhistory.address</name>  
26 |     <value>master1:10020</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>mapreduce.jobhistory.webapp.address</name>  
30 |     <value>master1:19888</value>  
31 |   </property>  
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/slave1/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 | 
17 | <!-- Site specific YARN configuration properties -->
18 |  <property>  
19 |     <name>yarn.nodemanager.aux-services</name>  
20 |     <value>mapreduce_shuffle</value>  
21 |   </property>  
22 |   <property>  
23 |     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>  
24 |     <value>org.apache.hadoop.mapred.ShuffleHandler</value>  
25 |   </property>  
26 |   <property>  
27 |     <name>yarn.resourcemanager.address</name>  
28 |     <value>master1:8032</value>  
29 |   </property>  
30 |   <property>  
31 |     <name>yarn.resourcemanager.scheduler.address</name>  
32 |     <value>master1:8030</value>  
33 |   </property>  
34 |   <property>  
35 |     <name>yarn.resourcemanager.resource-tracker.address</name>  
36 |     <value>master1:8031</value>  
37 |   </property>  
38 |   <property>  
39 |     <name>yarn.resourcemanager.admin.address</name>  
40 |     <value>master1:8033</value>  
41 |   </property>  
42 |   <property>  
43 |     <name>yarn.resourcemanager.webapp.address</name>  
44 |     <value>master1:8088</value>  
45 |   </property>  
46 | </configuration>
47 | 


--------------------------------------------------------------------------------
/cluster_conf/slave2/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>  
20 |   <property>  
21 |     <name>fs.default.name</name>  
22 |     <value>hdfs://master1:9000</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>hadoop.tmp.dir</name>  
26 |     <value>/home/node1/bigdata/hadoop-2.7.3/tmp</value>  
27 |   </property>  
28 | </configuration> 
29 | 


--------------------------------------------------------------------------------
/cluster_conf/slave2/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>  
21 |     <name>dfs.replication</name>  
22 |     <value>2</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>dfs.namenode.name.dir</name>  
26 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/name</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>dfs.datanode.data.dir</name>  
30 |     <value>file:/home/node1/bigdata/hadoop-2.7.3/dfs/data</value>  
31 |   </property>  
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/slave2/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>  
21 |     <name>mapreduce.framework.name</name>  
22 |     <value>yarn</value>  
23 |   </property>  
24 |   <property>  
25 |     <name>mapreduce.jobhistory.address</name>  
26 |     <value>master1:10020</value>  
27 |   </property>  
28 |   <property>  
29 |     <name>mapreduce.jobhistory.webapp.address</name>  
30 |     <value>master1:19888</value>  
31 |   </property>  
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/cluster_conf/slave2/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 | 
17 | <!-- Site specific YARN configuration properties -->
18 |  <property>  
19 |     <name>yarn.nodemanager.aux-services</name>  
20 |     <value>mapreduce_shuffle</value>  
21 |   </property>  
22 |   <property>  
23 |     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>  
24 |     <value>org.apache.hadoop.mapred.ShuffleHandler</value>  
25 |   </property>  
26 |   <property>  
27 |     <name>yarn.resourcemanager.address</name>  
28 |     <value>master1:8032</value>  
29 |   </property>  
30 |   <property>  
31 |     <name>yarn.resourcemanager.scheduler.address</name>  
32 |     <value>master1:8030</value>  
33 |   </property>  
34 |   <property>  
35 |     <name>yarn.resourcemanager.resource-tracker.address</name>  
36 |     <value>master1:8031</value>  
37 |   </property>  
38 |   <property>  
39 |     <name>yarn.resourcemanager.admin.address</name>  
40 |     <value>master1:8033</value>  
41 |   </property>  
42 |   <property>  
43 |     <name>yarn.resourcemanager.webapp.address</name>  
44 |     <value>master1:8088</value>  
45 |   </property>  
46 | </configuration>
47 | 


--------------------------------------------------------------------------------