├── .classpath
├── .project
├── .settings
    ├── .jsdtscope
    ├── org.eclipse.jdt.core.prefs
    ├── org.eclipse.m2e.core.prefs
    ├── org.eclipse.wst.common.component
    ├── org.eclipse.wst.common.project.facet.core.xml
    ├── org.eclipse.wst.jsdt.ui.superType.container
    ├── org.eclipse.wst.jsdt.ui.superType.name
    └── org.eclipse.wst.validation.prefs
├── LICENSE
├── README.md
├── conf_data
    ├── HA高可用场景
    │   └── HA部署.txt
    ├── HIVE安装.txt
    ├── HTTP_20130313143750.dat
    ├── ZK搭建步骤.txt
    ├── a.txt
    ├── b.txt
    ├── c.txt
    ├── flowsort-data
    ├── hadoop-env.sh
    ├── hbase安装配置
    │   ├── backup-masters
    │   ├── core-site.xml
    │   ├── hbase shell.txt
    │   ├── hbase-env.sh
    │   ├── hbase-site.xml
    │   ├── hbase集群搭建.txt
    │   ├── hdfs-site.xml
    │   ├── regionservers
    │   └── 笔记.txt
    ├── hive HQL语法示例.txt
    ├── hive-default.xml.template
    ├── hive-site.xml
    ├── hive-udf.txt
    ├── hive.txt
    ├── hive安装-视频.txt
    ├── hive笔记.txt
    ├── kafka安装配置
    │   ├── kafka安装
    │   ├── kafka笔记.txt
    │   ├── server-1.properties
    │   ├── server-2.properties
    │   └── server-3.properties
    ├── order.txt
    ├── spark安装部署.txt
    ├── spark运行命令样例.txt
    ├── storm安装配置
    │   ├── storm-trainning-v1.0-zs.ppt
    │   ├── storm.yaml
    │   └── storm安装手册及笔记.txt
    ├── udf.txt
    ├── udt.test.txt
    ├── word-count.txt
    ├── zoo.cfg
    ├── zoo_sample.cfg
    └── 非HA场景
    │   ├── Hadoop搭建步骤-非HA场景.txt
    │   ├── core-site.xml
    │   ├── hdfs-site.xml
    │   ├── mapred-site.xml
    │   ├── masters
    │   ├── slaves
    │   └── yarn-site.xml
├── data_analyze.jpg
├── data_analyze.png
├── hadoop.jpg
├── hbase
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── xcompany
    │       │   │       └── xproject
    │       │   │           └── hbase
    │       │   │               ├── App.java
    │       │   │               └── HBaseTest.java
    │       └── resources
    │       │   └── log4j.properties
    │   └── test
    │       └── java
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── hbase
    │                           └── AppTest.java
├── hdfs
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── dependency-reduced-pom.xml
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── xcompany
    │       │   │       └── xproject
    │       │   │           └── hdfs
    │       │   │               └── App.java
    │       └── resources
    │       │   └── log4j.properties
    │   └── test
    │       └── java
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── hdfs
    │                           ├── AppTest.java
    │                           └── HDFSTest.java
├── hive
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── dependency-reduced-pom.xml
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── xcompany
    │       │   │       └── xproject
    │       │   │           └── hive
    │       │   │               └── Phone2Area.java
    │       └── resources
    │       │   └── log4j.properties
    │   └── test
    │       └── java
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── hive
    │                           └── AppTest.java
├── kafka
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── pom.xml
    └── src
    │   └── main
    │       ├── java
    │           └── com
    │           │   └── xcompany
    │           │       └── xproject
    │           │           └── kafka
    │           │               ├── TestConsumer.java
    │           │               └── TestProducer.java
    │       └── resources
    │           └── log4j.properties
├── mmdetection
    ├── 1-mmdection安装使用记录.txt
    ├── 2-mmdection预测新数据.txt
    ├── 3-mmdection模型指标测试.txt
    └── README
├── mr
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── dependency-reduced-pom.xml
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── xcompany
    │       │   │       └── xproject
    │       │   │           └── mr
    │       │   │               ├── App.java
    │       │   │               ├── flowpartition
    │       │   │                   ├── FlowBean.java
    │       │   │                   ├── FlowPartition.java
    │       │   │                   ├── FlowPartitionJob.java
    │       │   │                   ├── FlowPartitionMapper.java
    │       │   │                   └── FlowPartitionReducer.java
    │       │   │               ├── flowsort
    │       │   │                   ├── FlowBean.java
    │       │   │                   ├── FlowSortJob.java
    │       │   │                   ├── FlowSortMapper.java
    │       │   │                   └── FlowSortReducer.java
    │       │   │               ├── flowsum
    │       │   │                   ├── FlowBean.java
    │       │   │                   ├── FlowSumJob.java
    │       │   │                   ├── FlowSumMapper.java
    │       │   │                   └── FlowSumReducer.java
    │       │   │               ├── invertedindex
    │       │   │                   ├── StepOneJob.java
    │       │   │                   ├── StepOneMapper.java
    │       │   │                   ├── StepOneReducer.java
    │       │   │                   ├── StepTwoJob.java
    │       │   │                   ├── StepTwoMapper.java
    │       │   │                   └── StepTwoReducer.java
    │       │   │               └── wordcount
    │       │   │                   ├── WordCountJob.java
    │       │   │                   ├── WordCountMapper.java
    │       │   │                   └── WordCountReducer.java
    │       └── resources
    │       │   └── log4j.properties
    │   └── test
    │       └── java
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── mr
    │                           └── AppTest.java
├── pom.xml
├── rpc
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── .jsdtscope
    │   ├── org.eclipse.jdt.core.prefs
    │   ├── org.eclipse.m2e.core.prefs
    │   ├── org.eclipse.wst.common.component
    │   ├── org.eclipse.wst.common.project.facet.core.prefs.xml
    │   ├── org.eclipse.wst.common.project.facet.core.xml
    │   ├── org.eclipse.wst.jsdt.ui.superType.container
    │   ├── org.eclipse.wst.jsdt.ui.superType.name
    │   └── org.eclipse.wst.validation.prefs
    ├── dependency-reduced-pom.xml
    ├── log
    │   └── hdfs.log
    ├── logs
    │   ├── hdfs.log
    │   └── hdfs.log.2017-11-06
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── com
    │       │   │   └── xcompany
    │       │   │       └── xproject
    │       │   │           └── rpc
    │       │   │               ├── App.java
    │       │   │               ├── HelloClient.java
    │       │   │               ├── HelloProtocol.java
    │       │   │               └── HelloServer.java
    │       └── resources
    │       │   └── log4j.properties
    │   └── test
    │       └── java
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── rpc
    │                           └── AppTest.java
├── scala
    ├── .cache
    ├── .cache-main
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   ├── org.eclipse.m2e.core.prefs
    │   └── org.scala-ide.sdt.core.prefs
    ├── pom.xml
    └── src
    │   └── main
    │       └── scala
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── scala
    │                           └── App.scala
├── spark-streaming.zip
├── spark
    ├── .cache-main
    ├── .cache-tests
    ├── .classpath
    ├── .gitignore
    ├── .project
    ├── .settings
    │   ├── org.eclipse.jdt.core.prefs
    │   └── org.eclipse.m2e.core.prefs
    ├── checkpoint
    │   ├── .checkpoint-1514427870000.crc
    │   ├── .checkpoint-1514427880000.crc
    │   ├── .checkpoint-1514427890000.crc
    │   ├── .checkpoint-1514427900000.crc
    │   ├── .checkpoint-1514427910000.crc
    │   ├── .checkpoint-1514427920000.crc
    │   ├── .checkpoint-1514427930000.crc
    │   ├── .checkpoint-1514427940000.crc
    │   ├── .checkpoint-1514427950000.crc
    │   ├── .checkpoint-1514427960000.crc
    │   ├── checkpoint-1514427870000
    │   ├── checkpoint-1514427880000
    │   ├── checkpoint-1514427890000
    │   ├── checkpoint-1514427900000
    │   ├── checkpoint-1514427910000
    │   ├── checkpoint-1514427920000
    │   ├── checkpoint-1514427930000
    │   ├── checkpoint-1514427940000
    │   ├── checkpoint-1514427950000
    │   ├── checkpoint-1514427960000
    │   └── receivedBlockMetadata
    │   │   ├── log-1514427870017-1514427930017
    │   │   └── log-1514427932107-1514427992107
    ├── pom.xml
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── xcompany
    │       │           └── xproject
    │       │               └── spark
    │       │                   ├── App.scala
    │       │                   ├── WordCount.scala
    │       │                   └── streaming
    │       │                       ├── BroadcastWrapper.scala
    │       │                       ├── KafkaWordCount.scala
    │       │                       ├── NetworkWordCount.scala
    │       │                       ├── RedisClient.scala
    │       │                       └── WaitForReady.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── xcompany
    │                   └── xproject
    │                       └── spark
    │                           ├── AppTest.scala
    │                           └── MySpec.scala
├── spark_data
    ├── a.txt
    ├── b.txt
    └── c.txt
├── sparkstreaming.zip
├── storm-kafka.zip
├── storm.zip
├── tensorflow
    ├── 01-TemsorFlow的模块与API.png
    ├── 01-TensorFlow基本概念与HelloWorld.txt
    ├── 02-TensorFlow架构.png
    ├── 02-TensorFlow核心基础知识.txt
    ├── 03-MNIST手写体数据集训练.txt
    ├── 1-机器学习基础.ipynb
    ├── 10-模型定义与查看.ipynb
    ├── 11-AML本地交互式训练.ipynb
    ├── 12-AML远程单节点训练.ipynb
    ├── 13-AML远程分布式训练.ipynb
    ├── 2-MNIST手写体数据集.ipynb
    ├── 3-工作区workspace访问.ipynb
    ├── 4-数据存储DataStore访问.ipynb
    ├── 5-注册数据集Dataset.ipynb
    ├── 6-标记数据集Dataset.ipynb
    ├── 7-加载数据集Dataset.ipynb
    ├── 8-规范化数据集Dataset.ipynb
    ├── 9-统计分析数据集Dataset.ipynb
    ├── MLOps流水线参考.png
    ├── README
    ├── env
    │   ├── README
    │   └── aml-demo-conda-dependencies.yaml
    ├── requests_futures使用参考.txt
    ├── src
    │   ├── README
    │   ├── job-dist.py
    │   ├── job.py
    │   ├── train-dist.py
    │   └── train.py
    └── 性能测试.png
└── 我的书签.rar


/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
 4 | 		<attributes>
 5 | 			<attribute name="maven.pomderived" value="true"/>
 6 | 			<attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
10 | 		<attributes>
11 | 			<attribute name="owner.project.facets" value="java"/>
12 | 		</attributes>
13 | 	</classpathentry>
14 | 	<classpathentry kind="output" path="target/classes"/>
15 | </classpath>
16 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>hadoop</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.wst.jsdt.core.javascriptValidator</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.jdt.core.javabuilder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 		<buildCommand>
19 | 			<name>org.eclipse.wst.common.project.facet.core.builder</name>
20 | 			<arguments>
21 | 			</arguments>
22 | 		</buildCommand>
23 | 		<buildCommand>
24 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
25 | 			<arguments>
26 | 			</arguments>
27 | 		</buildCommand>
28 | 		<buildCommand>
29 | 			<name>org.eclipse.wst.validation.validationbuilder</name>
30 | 			<arguments>
31 | 			</arguments>
32 | 		</buildCommand>
33 | 	</buildSpec>
34 | 	<natures>
35 | 		<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
36 | 		<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
37 | 		<nature>org.eclipse.jdt.core.javanature</nature>
38 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
39 | 		<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
40 | 		<nature>org.eclipse.wst.jsdt.core.jsNature</nature>
41 | 	</natures>
42 | </projectDescription>
43 | 


--------------------------------------------------------------------------------
/.settings/.jsdtscope:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" path="src/main/webapp"/>
 4 | 	<classpathentry kind="src" path="target/m2e-wtp/web-resources"/>
 5 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.JRE_CONTAINER"/>
 6 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.WebProject">
 7 | 		<attributes>
 8 | 			<attribute name="hide" value="true"/>
 9 | 		</attributes>
10 | 	</classpathentry>
11 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.baseBrowserLibrary"/>
12 | 	<classpathentry kind="output" path=""/>
13 | </classpath>
14 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
 6 | org.eclipse.jdt.core.compiler.compliance=1.8
 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.8
14 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.common.component:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
 2 |     <wb-module deploy-name="hadoop">
 3 |         <wb-resource deploy-path="/" source-path="/target/m2e-wtp/web-resources"/>
 4 |         <wb-resource deploy-path="/" source-path="/src/main/webapp" tag="defaultRootSource"/>
 5 |         <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/>
 6 |         <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/resources"/>
 7 |         <property name="context-root" value="hadoop"/>
 8 |         <property name="java-output-path" value="/hadoop/target/classes"/>
 9 |     </wb-module>
10 | </project-modules>
11 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.common.project.facet.core.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <faceted-project>
3 |   <fixed facet="wst.jsdt.web"/>
4 |   <installed facet="jst.web" version="2.3"/>
5 |   <installed facet="wst.jsdt.web" version="1.0"/>
6 |   <installed facet="java" version="1.8"/>
7 | </faceted-project>
8 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.jsdt.ui.superType.container:
--------------------------------------------------------------------------------
1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary


--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.jsdt.ui.superType.name:
--------------------------------------------------------------------------------
1 | Window


--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.validation.prefs:
--------------------------------------------------------------------------------
1 | disabled=06target
2 | eclipse.preferences.version=1
3 | 


--------------------------------------------------------------------------------
/conf_data/HA高可用场景/HA部署.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/HA高可用场景/HA部署.txt


--------------------------------------------------------------------------------
/conf_data/HTTP_20130313143750.dat:
--------------------------------------------------------------------------------
 1 | ﻿1363157985066 	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com		24	27	2481	24681	200
 2 | 1363157995052 	13826544101	5C-0E-8B-C7-F1-E0:CMCC	120.197.40.4			4	0	264	0	200
 3 | 1363157991076 	13926435656	20-10-7A-28-CC-0A:CMCC	120.196.100.99			2	4	132	1512	200
 4 | 1363154400022 	13926251106	5C-0E-8B-8B-B1-50:CMCC	120.197.40.4			4	0	240	0	200
 5 | 1363157993044 	18211575961	94-71-AC-CD-E6-18:CMCC-EASY	120.196.100.99	iface.qiyi.com	视频网站	15	12	1527	2106	200
 6 | 1363157995074 	84138413	5C-0E-8B-8C-E8-20:7DaysInn	120.197.40.4	122.72.52.12		20	16	4116	1432	200
 7 | 1363157993055 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			18	15	1116	954	200
 8 | 1363157995033 	15920133257	5C-0E-8B-C7-BA-20:CMCC	120.197.40.4	sug.so.xxx.cn	信息安全	20	20	3156	2936	200
 9 | 1363157983019 	13719199419	68-A1-B7-03-07-B1:CMCC-EASY	120.196.100.82			4	0	240	0	200
10 | 1363157984041 	13660577991	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	24	9	6960	690	200
11 | 1363157973098 	15013685858	5C-0E-8B-C7-F7-90:CMCC	120.197.40.4	rank.ie.sogou.com	搜索引擎	28	27	3659	3538	200
12 | 1363157986029 	15989002119	E8-99-C4-4E-93-E0:CMCC-EASY	120.196.100.99	www.umeng.com	站点统计	3	3	1938	180	200
13 | 1363157992093 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			15	9	918	4938	200
14 | 1363157986041 	13480253104	5C-0E-8B-C7-FC-80:CMCC-EASY	120.197.40.4			3	3	180	180	200
15 | 1363157984040 	13602846565	5C-0E-8B-8B-B6-00:CMCC	120.197.40.4	2052.flash2-http.qq.com	综合门户	15	12	1938	2910	200
16 | 1363157995093 	13922314466	00-FD-07-A2-EC-BA:CMCC	120.196.100.82	img.qfc.cn		12	12	3008	3720	200
17 | 1363157982040 	13502468823	5C-0A-5B-6A-0B-D4:CMCC-EASY	120.196.100.99	y0.ifengimg.com	综合门户	57	102	7335	110349	200
18 | 1363157986072 	18320173382	84-25-DB-4F-10-1A:CMCC-EASY	120.196.100.99	input.shouji.sogou.com	搜索引擎	21	18	9531	2412	200
19 | 1363157990043 	13925057413	00-1F-64-E1-E6-9A:CMCC	120.196.100.55	t3.baidu.com	搜索引擎	69	63	11058	48243	200
20 | 1363157988072 	13760778710	00-FD-07-A4-7B-08:CMCC	120.196.100.82			2	2	120	120	200
21 | 1363157985079 	13823070001	20-7C-8F-70-68-1F:CMCC	120.196.100.99			6	3	360	180	200
22 | 1363157985069 	13600217502	00-1F-64-E2-E8-B1:CMCC	120.196.100.55			18	138	1080	186852	200
23 | 


--------------------------------------------------------------------------------
/conf_data/ZK搭建步骤.txt:
--------------------------------------------------------------------------------
 1 | 版本选择: http://blog.csdn.net/anningzhu/article/details/60468723
 2 | http://hbase.apache.org/book.html#zookeeper.requirements
 3 | 选择：zookeeper-3.4.9.tar.gz
 4 | 
 5 | /home/xxproject/lib
 6 | tar -xzvf zookeeper-3.4.9.tar.gz
 7 | ln -sf zookeeper-3.4.9 zookeeper
 8 | mkdir -p /home/xxproject/data/zookeeper
 9 | zoo.cfg 配置拷贝到 /home/xxproject/lib/zookeeper-3.4.9/conf 下面
10 | 
11 | echo '
12 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
13 | export ZK_HOME=/home/xxproject/lib/zookeeper
14 | export PATH=${PATH}:${ZK_HOME}/bin
15 | ' >> ~/.bash_profile
16 | source ~/.bash_profile
17 | 
18 | 三台机器分别执行
19 | echo 1 > /home/xxproject/data/zookeeper/myid
20 | echo 2 > /home/xxproject/data/zookeeper/myid
21 | echo 3 > /home/xxproject/data/zookeeper/myid
22 | 
23 | # 启动ZK
24 | #./zookeeper/bin/zkServer.sh start
25 | #./zookeeper/bin/zkServer.sh status
26 | # bin/zkCli.sh -server 127.0.0.1:2181
27 | zkServer.sh start
28 | zkServer.sh status
29 | # jps QuorumPeerMain
30 | 


--------------------------------------------------------------------------------
/conf_data/a.txt:
--------------------------------------------------------------------------------
1 | hello tom
2 | hello jerry
3 | hello tom
4 | 


--------------------------------------------------------------------------------
/conf_data/b.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello jerry
3 | tom jerry
4 | 


--------------------------------------------------------------------------------
/conf_data/c.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello tom
3 | 


--------------------------------------------------------------------------------
/conf_data/flowsort-data:
--------------------------------------------------------------------------------
 1 | 13480253104	180	180	360
 2 | 13502468823	7335	110349	117684
 3 | 13560439658	2034	5892	7926
 4 | 13600217502	1080	186852	187932
 5 | 13602846565	1938	2910	4848
 6 | 13660577991	6960	690	7650
 7 | 13719199419	240	0	240
 8 | 13726230503	2481	24681	27162
 9 | 13760778710	120	120	240
10 | 13823070001	360	180	540
11 | 13826544101	264	0	264
12 | 13922314466	3008	3720	6728
13 | 13925057413	11058	48243	59301
14 | 13926251106	240	0	240
15 | 13926435656	132	1512	1644
16 | 15013685858	3659	3538	7197
17 | 15920133257	3156	2936	6092
18 | 15989002119	1938	180	2118
19 | 18211575961	1527	2106	3633
20 | 18320173382	9531	2412	11943
21 | 84138413	4116	1432	5548
22 | 


--------------------------------------------------------------------------------
/conf_data/hadoop-env.sh:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Set Hadoop-specific environment variables here.
 18 | 
 19 | # The only required environment variable is JAVA_HOME.  All others are
 20 | # optional.  When running a distributed configuration it is best to
 21 | # set JAVA_HOME in this file, so that it is correctly defined on
 22 | # remote nodes.
 23 | 
 24 | # The java implementation to use.
 25 | JAVA_HOME=/home/xxproject/lib/jdk
 26 | export JAVA_HOME=${JAVA_HOME}
 27 | 
 28 | # The jsvc implementation to use. Jsvc is required to run secure datanodes
 29 | # that bind to privileged ports to provide authentication of data transfer
 30 | # protocol.  Jsvc is not required if SASL is configured for authentication of
 31 | # data transfer protocol using non-privileged ports.
 32 | #export JSVC_HOME=${JSVC_HOME}
 33 | 
 34 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
 35 | 
 36 | # Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
 37 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
 38 |   if [ "$HADOOP_CLASSPATH" ]; then
 39 |     export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
 40 |   else
 41 |     export HADOOP_CLASSPATH=$f
 42 |   fi
 43 | done
 44 | 
 45 | # The maximum amount of heap to use, in MB. Default is 1000.
 46 | #export HADOOP_HEAPSIZE=
 47 | #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
 48 | 
 49 | # Extra Java runtime options.  Empty by default.
 50 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
 51 | 
 52 | # Command specific options appended to HADOOP_OPTS when specified
 53 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
 54 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
 55 | 
 56 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
 57 | 
 58 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
 59 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
 60 | 
 61 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
 62 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
 63 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
 64 | 
 65 | # On secure datanodes, user to run the datanode as after dropping privileges.
 66 | # This **MUST** be uncommented to enable secure HDFS if using privileged ports
 67 | # to provide authentication of data transfer protocol.  This **MUST NOT** be
 68 | # defined if SASL is configured for authentication of data transfer protocol
 69 | # using non-privileged ports.
 70 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
 71 | 
 72 | # Where log files are stored.  $HADOOP_HOME/logs by default.
 73 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
 74 | 
 75 | # Where log files are stored in the secure data environment.
 76 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
 77 | 
 78 | ###
 79 | # HDFS Mover specific parameters
 80 | ###
 81 | # Specify the JVM options to be used when starting the HDFS Mover.
 82 | # These options will be appended to the options specified as HADOOP_OPTS
 83 | # and therefore may override any similar flags set in HADOOP_OPTS
 84 | #
 85 | # export HADOOP_MOVER_OPTS=""
 86 | 
 87 | ###
 88 | # Advanced Users Only!
 89 | ###
 90 | 
 91 | # The directory where pid files are stored. /tmp by default.
 92 | # NOTE: this should be set to a directory that can only be written to by 
 93 | #       the user that will run the hadoop daemons.  Otherwise there is the
 94 | #       potential for a symlink attack.
 95 | export HADOOP_PID_DIR=${HADOOP_PID_DIR}
 96 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
 97 | 
 98 | # A string representing this instance of hadoop. $USER by default.
 99 | export HADOOP_IDENT_STRING=$USER
100 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/backup-masters:
--------------------------------------------------------------------------------
1 | node-03
2 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |     <!-- 指定namenode的主机名和端口号，此处的端口号需要和hdfs-site.xml的dfs.namenode.rpc-address配置的端口相同 -->
21 |     <property>
22 |         <name>fs.defaultFS</name>
23 |         <value>hdfs://node-01:9000</value>
24 |         <description>The name of the default file system.</description>
25 |     </property>
26 |     <!-- 配置hadoop的临时目录，我们不需要把这个目录创建出来, 这里的路径默认是NameNode,DataNode,JournalNode等存放数据的公共目录-->  
27 |     <property>  
28 |         <name>hadoop.tmp.dir</name>  
29 |         <value>/home/xxproject/data/hadoop/tmp</value>  
30 |         <description>A base for other temporary directories.</description>    
31 |     </property>
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase shell.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase shell.txt


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 | /**
 5 |  *
 6 |  * Licensed to the Apache Software Foundation (ASF) under one
 7 |  * or more contributor license agreements.  See the NOTICE file
 8 |  * distributed with this work for additional information
 9 |  * regarding copyright ownership.  The ASF licenses this file
10 |  * to you under the Apache License, Version 2.0 (the
11 |  * "License"); you may not use this file except in compliance
12 |  * with the License.  You may obtain a copy of the License at
13 |  *
14 |  *     http://www.apache.org/licenses/LICENSE-2.0
15 |  *
16 |  * Unless required by applicable law or agreed to in writing, software
17 |  * distributed under the License is distributed on an "AS IS" BASIS,
18 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 |  * See the License for the specific language governing permissions and
20 |  * limitations under the License.
21 |  */
22 | -->
23 | <configuration>
24 |     <!-- 指定hbase在HDFS上存储的路径 -->
25 |     <property>
26 |         <name>hbase.rootdir</name>
27 |         <value>hdfs://node-01:9000/hbase</value>
28 |     </property>
29 |     <!-- 指定hbase是分布式的 -->
30 |     <property>
31 |         <name>hbase.cluster.distributed</name>
32 |         <value>true</value>
33 |     </property>
34 |     <!-- 指定zk的地址，多个用“,”分割 -->
35 |     <property>
36 |         <name>hbase.zookeeper.quorum</name>
37 |         <value>node-01:2181,node-02:2181,node-03:2181</value>
38 |     </property>
39 | </configuration>
40 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase集群搭建.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase集群搭建.txt


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |     <!-- 这里是secondary namenode的主机名、端口，可以通过HTTP/HTTPS访问 -->
21 |     <property>  
22 |         <name>dfs.namenode.secondary.http-address</name>  
23 |         <value>node-02:50090</value>  
24 |         <description>The secondary namenode http server address and port. </description>  
25 |     </property>
26 |     <property>  
27 |         <name>dfs.namenode.secondary.https-address</name>
28 |         <value>node-02:50091</value>
29 |         <description>The secondary namenode HTTPS server address and port. </description> 
30 |     </property> 
31 |     <!-- 配置web UI 访问hdfs系统的地址-->  
32 |     <property>  
33 |         <name>dfs.namenode.http-address</name>  
34 |         <value>node-01:50070</value>  
35 |         <description>The address and the base port where the dfs namenode web ui will listen on. </description>  
36 |     </property>   
37 |     <!-- 设置HDFS的副本数 -->  
38 |     <property>  
39 |         <name>dfs.replication</name>  
40 |         <value>3</value>  
41 |     </property>
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/regionservers:
--------------------------------------------------------------------------------
1 | node-02
2 | node-03
3 | node-04
4 | 


--------------------------------------------------------------------------------
/conf_data/hbase安装配置/笔记.txt:
--------------------------------------------------------------------------------
 1 | cd /home/xxproject/lib
 2 | tar -xzvf hbase-1.2.6-bin.tar.gz
 3 | ln -sf hbase-1.2.6 hbase
 4 | 
 5 | hbase-env.sh 中添加：
 6 | export JAVA_HOME=/home/xxproject/lib/jdk
 7 | export HBASE_MANAGES_ZK=false
 8 | 
 9 | hdfs的 core-site.xml、hdfs-site.xml 拷贝到 hbase的配置文件目录---------因为hadoop不是ns的方式，所以应该是不需要的
10 | 
11 | 修改hbase-site.xml配置如下：
12 |     <!-- 指定hbase在HDFS上存储的路径 -->
13 |     <property>
14 |         <name>hbase.rootdir</name>
15 |         <value>hdfs://node-01:9000/hbase</value>
16 |     </property>
17 |     <!-- 指定hbase是分布式的 -->
18 |     <property>
19 |         <name>hbase.cluster.distributed</name>
20 |         <value>true</value>
21 |     </property>
22 |     <!-- 指定zk的地址，多个用“,”分割 -->
23 |     <property>
24 |         <name>hbase.zookeeper.quorum</name>
25 |         <value>node-01:2181,node-02:2181,node-03:2181</value>
26 |     </property>
27 | 
28 | echo '
29 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
30 | export HBASE_HOME=/home/xxproject/lib/hbase
31 | export PATH=${PATH}:${HBASE_HOME}/bin
32 | ' >> ~/.bash_profile
33 | source ~/.bash_profile
34 | 
35 | hmaster--regionserver--hmaster-backup
36 | HMaster执行:
37 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master/hbase-daemon.sh start master
38 | regionserver节点上都执行:
39 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start regionserver
40 | HMaster-BackUp也执行:
41 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master
42 | 
43 | 该步骤暂时屏蔽--有问题：
44 | ```5.启动所有的hbase
45 | 	分别启动zk
46 | 		./zkServer.sh start
47 | 	启动hbase集群
48 | 		start-dfs.sh
49 | 	启动hbase，在主节点上运行：
50 | 		start-hbase.sh
51 | 6.通过浏览器访问hbase管理页面
52 | 	192.168.1.201:60010
53 | 7.为保证集群的可靠性，要启动多个HMaster
54 | 	hbase-daemon.sh start master
55 | ```
56 | 
57 | 测试一下：
58 | http://10.20.0.12:16010/master-status
59 | http://10.20.0.12:16030/rs-status
60 | 可以杀死master节点看看备节点会不会升主
61 | 
62 | hbase shell 试用一下：
63 | create 'mygirls', {NAME => 'base_info', VERSIONS => 3}, {NAME => 'extra_info'}
64 | describe 'mygirls'
65 | put 'mygirls', '0001', 'base_info:name', 'fengjie'
66 | put 'mygirls', '0001', 'base_info:age', '28'
67 | put 'mygirls', '0001', 'base_info:gender', 'feamle'
68 | put 'mygirls', '0001', 'extra_info:boyfriend', 'xiaoming'
69 | get 'mygirls', '0001'
70 | get 'mygirls', '0001', 'base_info'
71 | get 'mygirls', '0001', 'extra_info:boyfriend'
72 | 
73 | put 'mygirls', '0001', 'base_info:name', 'fengbaobao'
74 | get 'mygirls', '0001', {COLUMN => 'base_info', VERSIONS => 3}
75 | 
76 | get 只能一次一行数据， 返回多行用scan
77 | scan 'mygirls', {COLUMNS => ['base_info'], LIMIT => 10, STARTROW => '0001', VERSIONS => 10}
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/conf_data/hive HQL语法示例.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive HQL语法示例.txt


--------------------------------------------------------------------------------
/conf_data/hive-udf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive-udf.txt


--------------------------------------------------------------------------------
/conf_data/hive.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive.txt


--------------------------------------------------------------------------------
/conf_data/hive安装-视频.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive安装-视频.txt


--------------------------------------------------------------------------------
/conf_data/hive笔记.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive笔记.txt


--------------------------------------------------------------------------------
/conf_data/kafka安装配置/kafka安装:
--------------------------------------------------------------------------------
 1 | ﻿kafka笔记 
 2 | 
 3 | 
 4 | 集群安装
 5 | 1、解压
 6 | 2、修改server.properties
 7 | broker.id=1
 8 | zookeeper.connect=weekend05:2181,weekend06:2181,weekend07:2181
 9 | 
10 | 3、将zookeeper集群启动
11 | 
12 | 4、在每一台节点上启动broker
13 | bin/kafka-server-start.sh config/server.properties
14 | 
15 | 5、在kafka集群中创建一个topic
16 | bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order
17 | 
18 | 6、用一个producer向某一个topic中写入消息
19 | bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order
20 | 
21 | 7、用一个comsumer从某一个topic中读取信息
22 | bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order
23 | 
24 | 8、查看一个topic的分区及副本状态信息
25 | bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order


--------------------------------------------------------------------------------
/conf_data/kafka安装配置/kafka笔记.txt:
--------------------------------------------------------------------------------
 1 | ﻿kafka笔记 
 2 | 1/kafka是一个分布式的消息缓存系统
 3 | 2/kafka集群中的服务器都叫做broker
 4 | 3/kafka有两类客户端，一类叫producer（消息生产者），一类叫做consumer（消息消费者），客户端和broker服务器之间采用tcp协议连接
 5 | 4/kafka中不同业务系统的消息可以通过topic进行区分，而且每一个消息topic都会被分区，以分担消息读写的负载
 6 | 5/每一个分区都可以有多个副本，以防止数据的丢失
 7 | 6/某一个分区中的数据如果需要更新，都必须通过该分区所有副本中的leader来更新
 8 | 7/消费者可以分组，比如有两个消费者组A和B，共同消费一个topic：order_info,A和B所消费的消息不会重复
 9 | 比如 order_info 中有100个消息，每个消息有一个id,编号从0-99，那么，如果A组消费0-49号，B组就消费50-99号
10 | 8/消费者在具体消费某个topic中的消息时，可以指定起始偏移量
11 | 
12 | 
13 | 
14 | 
15 | 集群安装
16 | 1、解压
17 | cd /home/xxproject/lib
18 | tar -xzvf kafka_2.11-0.11.0.1.tgz
19 | ln -sf kafka_2.11-0.11.0.1 kafka
20 | 
21 | 
22 | 修改环境变量
23 | echo '
24 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
25 | export KAFKA_HOME=/home/xxproject/lib/kafka
26 | export PATH=${PATH}:${KAFKA_HOME}/bin
27 | ' >> ~/.bash_profile
28 | source ~/.bash_profile
29 | 
30 | 
31 | 2、修改server.properties
32 | ===================================================================
33 | broker.id=1/2/3
34 | zookeeper.connect=node-01:2181,node-02:2181,node-03:2181
35 | ===================================================================
36 | 
37 | 3、将zookeeper集群启动
38 | 
39 | 4、在每一台节点上启动broker, node-02/3/4上分别启动，指定不同的配置文件
40 | # bin/kafka-server-start.sh config/server-1.properties
41 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-1.properties
42 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-2.properties
43 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-3.properties
44 | 
45 | 
46 | 5、在kafka集群中创建一个topic
47 | # bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order
48 | kafka-topics.sh --create --zookeeper 'node-01:2181,node-02:2181,node-03:2181' --replication-factor 3 --partitions 1 --topic order-r
49 | kafka-topics.sh --list --zookeeper 'node-01:2181,node-02:2181,node-03:2181'
50 | kafka-topics.sh --describe --zookeeper 'node-01:2181,node-02:2181,node-03:2181'
51 | 
52 | 6、用一个producer向某一个topic中写入消息
53 | # bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order
54 | kafka-console-producer.sh --broker-list node-02:9092 --topic order
55 | >>> This is a message
56 | >>> This is another message
57 | 
58 | kafka-console-producer.sh --broker-list node-02:9092,node-03:9092,node-04:9092 --topic order-r
59 | >>> This is a message
60 | >>> This is another message
61 | 
62 | 7、用一个comsumer从某一个topic中读取信息
63 | # bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order
64 | kafka-console-consumer.sh --bootstrap-server node-02:9092 --topic order --from-beginning
65 | kafka-console-consumer.sh --bootstrap-server node-02:9092,node-03:9092,node-04:9092 --topic order-r --from-beginning
66 | 
67 | 8、查看一个topic的分区及副本状态信息
68 | # bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/conf_data/order.txt:
--------------------------------------------------------------------------------
1 | 0000101	iphone6plus	64G	6888
2 | 0000102	xiaominote	64G	2388
3 | 0000103	iphone5	64G	6888
4 | 0000104	xiaomi5	64G	2388
5 | 0000105	huawei	64G	6888
6 | 


--------------------------------------------------------------------------------
/conf_data/spark安装部署.txt:
--------------------------------------------------------------------------------
 1 | Spark参考博客：http://blog.csdn.net/lovehuangjiaju
 2 | 
 3 | 版本：2.2.0
 4 | #Pre-build with user-provided Hadoop: "Hadoop free" 版，可应用到任意 Hadoop 版本
 5 | #https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-without-hadoop.tgz
 6 | 
 7 | Pre-built for Apache Hadoop 2.7 and later 
 8 | https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
 9 | 
10 | 
11 | 安装Java==1.8.0_144、Scala==2.11.11
12 | tar -xzvf spark-2.2.0-bin-hadoop2.7.tgz
13 | ln -sf spark-2.2.0-bin-hadoop2.7 spark
14 | 
15 | 配置环境变量
16 | echo '
17 | # !!!No Modification, This Section is Auto Generated by Spark
18 | export SPARK_HOME=/home/xxproject/lib/spark
19 | export PATH=${PATH}:${SPARK_HOME}/bin
20 | ' >> ~/.bash_profile
21 | source ~/.bash_profile
22 | 
23 | 配置slaves节点
24 | cd spark/conf/
25 | cp slaves.template slaves
26 | vi slaves
27 | 
28 | # localhost
29 | node-01
30 | node-02
31 | node-03
32 | node-04
33 | 
34 | 配置spark启动环境变量
35 | cp spark-env.sh.template spark-env.sh
36 | vi spark-env.sh
37 | 
38 | export JAVA_HOME=/home/xxproject/lib/jdk
39 | # export SCALA_HOME=/home/xxproject/lib/scala
40 | export SPARK_MASTER_HOST=node-01
41 | export SPARK_MASTER_PORT=7077
42 | # export MASTER=spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT}
43 | export SPARK_WORKER_CORES=1
44 | # export SPARK_WORKER_INSTANCES=1
45 | export SPARK_WORKER_MEMORY=1g
46 | # export HADOOP_CONF_DIR=/opt/hadoop-2.7.3/etc/hadoop
47 | # export HADOOP_HOME=/home/hadoop/package/hadoop-2.7.2/etc/hadoop
48 | # export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
49 | 
50 | 启动spark集群：
51 | sbin/start-all.sh
52 | 
53 | 
54 | 查看界面：http://10.20.0.11:8080/
55 | 测试一下：
56 | cd /home/xxproject/lib/spark
57 | spark-submit --class org.apache.spark.examples.SparkPi --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1 /home/xxproject/lib/spark/examples/jars/spark-examples_2.11-2.2.0.jar 100
58 | 
59 | spark-shell测试：
60 | spark-shell --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1
61 | 
62 | val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/")
63 | val words = lines.flatMap(line => line.split(" ") )
64 | val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b)
65 | wordCounts.collect().foreach(println)
66 | wordCounts.partitions.length
67 | wordCounts.saveAsTextFile("file:///tmp/output")
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/conf_data/spark运行命令样例.txt:
--------------------------------------------------------------------------------
 1 | ﻿local单机模式：
 2 | 结果xshell可见：
 3 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master local[1] ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
 4 | 
 5 | standalone集群模式：
 6 | 需要的配置项
 7 | 1, slaves文件
 8 | 2, spark-env.sh
 9 | export JAVA_HOME=/usr/soft/jdk1.7.0_71
10 | export SPARK_MASTER_IP=spark001
11 | export SPARK_MASTER_PORT=7077
12 | export SPARK_WORKER_CORES=1
13 | export SPARK_WORKER_INSTANCES=1
14 | export SPARK_WORKER_MEMORY=1g
15 | 
16 | standalone集群模式：
17 | 之client模式：
18 | 结果xshell可见：
19 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
20 | 
21 | 
22 | standalone集群模式：
23 | 之cluster模式：
24 | 结果spark001:8080里面可见！
25 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --deploy-mode cluster --supervise --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
26 | 
27 | Yarn集群模式：
28 | 需要的配置项
29 | 1, spark-env.sh
30 | export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
31 | export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
32 | export SPARK_HOME=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4
33 | export SPARK_JAR=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4/lib/spark-assembly-1.3.1-hadoop2.4.0.jar
34 | export PATH=$SPARK_HOME/bin:$PATH
35 | 2, ~/.bash_profile
36 | 配置好hadoop环境变量
37 | 
38 | Yarn集群模式：
39 | client模式：
40 | 结果xshell可见：
41 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
42 | 
43 | Yarn集群模式：
44 | cluster模式：
45 | 结果spark001:8088里面可见！
46 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-cluster --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
47 | 
48 | 


--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt


--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm.yaml:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | ########### These MUST be filled in for a storm configuration
18 | 
19 | storm.zookeeper.servers:
20 |     - "node-01"
21 |     - "node-02"
22 |     - "node-0381"
23 | storm.zookeeper.port: 2181
24 | nimbus.seeds: ["node-01"]
25 | 
26 | # ##### These may optionally be filled in:
27 | #    
28 | ## List of custom serializations
29 | # topology.kryo.register:
30 | #     - org.mycompany.MyType
31 | #     - org.mycompany.MyType2: org.mycompany.MyType2Serializer
32 | #
33 | ## List of custom kryo decorators
34 | # topology.kryo.decorators:
35 | #     - org.mycompany.MyDecorator
36 | #
37 | ## Locations of the drpc servers
38 | # drpc.servers:
39 | #     - "server1"
40 | #     - "server2"
41 | 
42 | ## Metrics Consumers
43 | ## max.retain.metric.tuples
44 | ## - task queue will be unbounded when max.retain.metric.tuples is equal or less than 0.
45 | ## whitelist / blacklist
46 | ## - when none of configuration for metric filter are specified, it'll be treated as 'pass all'.
47 | ## - you need to specify either whitelist or blacklist, or none of them. You can't specify both of them.
48 | ## - you can specify multiple whitelist / blacklist with regular expression
49 | ## expandMapType: expand metric with map type as value to multiple metrics
50 | ## - set to true when you would like to apply filter to expanded metrics
51 | ## - default value is false which is backward compatible value
52 | ## metricNameSeparator: separator between origin metric name and key of entry from map
53 | ## - only effective when expandMapType is set to true
54 | # topology.metrics.consumer.register:
55 | #   - class: "org.apache.storm.metric.LoggingMetricsConsumer"
56 | #     max.retain.metric.tuples: 100
57 | #     parallelism.hint: 1
58 | #   - class: "org.mycompany.MyMetricsConsumer"
59 | #     max.retain.metric.tuples: 100
60 | #     whitelist:
61 | #       - "execute.*"
62 | #       - "^__complete-latency$"
63 | #     parallelism.hint: 1
64 | #     argument:
65 | #       - endpoint: "metrics-collector.mycompany.org"
66 | #     expandMapType: true
67 | #     metricNameSeparator: "."
68 | 
69 | ## Cluster Metrics Consumers
70 | # storm.cluster.metrics.consumer.register:
71 | #   - class: "org.apache.storm.metric.LoggingClusterMetricsConsumer"
72 | #   - class: "org.mycompany.MyMetricsConsumer"
73 | #     argument:
74 | #       - endpoint: "metrics-collector.mycompany.org"
75 | #
76 | # storm.cluster.metrics.consumer.publish.interval.secs: 60


--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm安装手册及笔记.txt:
--------------------------------------------------------------------------------
 1 | ﻿1、安装一个zookeeper集群
 2 | 
 3 | 2、上传storm的安装包，解压
 4 | /home/xxproject/lib
 5 | tar -xzvf apache-storm-1.1.1.tar.gz
 6 | ln -sf apache-storm-1.1.1 storm
 7 | 
 8 | 3、修改配置文件storm.yaml
 9 | ======================================================
10 | storm.zookeeper.servers:
11 |     - "node-01"
12 |     - "node-02"
13 |     - "node-0381"
14 | storm.zookeeper.port: 2181
15 | nimbus.seeds: ["node-01"]
16 | ======================================================
17 | 
18 | #所使用的zookeeper集群主机
19 | storm.zookeeper.servers:
20 |      - "weekend05"
21 |      - "weekend06"
22 |      - "weekend07"
23 | 
24 | #nimbus所在的主机名
25 | nimbus.host: "weekend05"
26 | 
27 | 
28 | 配置环境变量：
29 | echo '
30 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
31 | export STORM_HOME=/home/xxproject/lib/storm
32 | export PATH=${PATH}:${STORM_HOME}/bin
33 | ' >> ~/.bash_profile
34 | source ~/.bash_profile
35 | 
36 | 
37 | 最多启动5个Worker进程的意思，默认是4个，暂时不需要调整
38 | supervisor.slots.ports
39 | -6701
40 | -6702
41 | -6703
42 | -6704
43 | -6705
44 | 
45 | 启动storm 
46 | 在nimbus主机上，node-01节点
47 | nohup storm nimbus 1>/dev/null 2>&1 &
48 | nohup storm ui 1>/dev/null 2>&1 &
49 | 
50 | 访问： http://10.20.0.11:8080/index.html
51 | 
52 | 在supervisor主机上， node-02/3等两个节点上面都启动
53 | nohup storm supervisor 1>/dev/null 2>&1 &
54 | 
55 | 
56 | storm的深入学习：
57 | 分布式共享锁的实现
58 | 事务topology的实现机制及开发模式
59 | 在具体场景中的跟其他框架的整合（flume/activeMQ/kafka(分布式的消息队列系统)       /redis/hbase/mysql cluster）
60 | 
61 | 遗留问题：
62 | 对事物的支持
63 | 
64 | 
65 | 
66 | 
67 | 提交任务：
68 | storm jar storm.jar com.xcompany.xproject.storm.TestTopo
69 | storm list
70 | storm kill brandNameTopo
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/conf_data/udf.txt:
--------------------------------------------------------------------------------
 1 | 1389990045,http://www.163.com,2000
 2 | 1385566005,http://www.163.com,2000
 3 | 1385566005,http://www.163.com,2000
 4 | 1389990045,http://www.163.com,2000
 5 | 1390876045,http://www.163.com,2000
 6 | 1385566005,http://www.163.com,2000
 7 | 1390876045,http://www.163.com,2000
 8 | 1390876045,http://www.163.com,2000
 9 | 1389990045,http://www.163.com,2000
10 | 
11 | select myfunction(nbr),url,flow from t_flow;
12 | 
13 | 
14 | 1389990045	beijing	http://www.163.com	2000
15 | 1385566005,beijing	http://www.163.com	2000
16 | 1385566005,beijing	http://www.163.com	2000
17 | 1389990045,tianjing,http://www.163.com,2000
18 | 1390876045,tianjing,http://www.163.com,2000
19 | 1385566005,tianjing,http://www.163.com,2000
20 | 1390876045,beijing,http://www.163.com,2000
21 | 1390876045,nanjing,http://www.163.com,2000
22 | 1389990045,nanjing,http://www.163.com,2000
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/conf_data/udt.test.txt:
--------------------------------------------------------------------------------
 1 | 1389990045,http://www.163.com,2000
 2 | 1385566005,http://www.163.com,2000
 3 | 1385566005,http://www.163.com,2000
 4 | 1389990045,http://www.163.com,2000
 5 | 1390876045,http://www.163.com,2000
 6 | 1385566005,http://www.163.com,2000
 7 | 1390876045,http://www.163.com,2000
 8 | 1390876045,http://www.163.com,2000
 9 | 1389990045,http://www.163.com,2000
10 | 


--------------------------------------------------------------------------------
/conf_data/word-count.txt:
--------------------------------------------------------------------------------
1 | hello world
2 | hello tom
3 | hello jim
4 | hello kitty
5 | hello baby
6 | 


--------------------------------------------------------------------------------
/conf_data/zoo.cfg:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial 
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between 
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just 
11 | # example sakes.
12 | # dataDir=/tmp/zookeeper
13 | dataDir=/home/xxproject/data/zookeeper
14 | # the port at which the clients will connect
15 | clientPort=2181
16 | # the maximum number of client connections.
17 | # increase this if you need to handle more clients
18 | #maxClientCnxns=60
19 | #
20 | # Be sure to read the maintenance section of the 
21 | # administrator guide before turning on autopurge.
22 | #
23 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
24 | #
25 | # The number of snapshots to retain in dataDir
26 | #autopurge.snapRetainCount=3
27 | # Purge task interval in hours
28 | # Set to "0" to disable auto purge feature
29 | #autopurge.purgeInterval=1
30 | 
31 | # Added By ZooKeeper
32 | server.1=node-01:2888:3888
33 | server.2=node-02:2888:3888
34 | server.3=node-03:2888:3888
35 | 
36 | 


--------------------------------------------------------------------------------
/conf_data/zoo_sample.cfg:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial 
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between 
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just 
11 | # example sakes.
12 | dataDir=/tmp/zookeeper
13 | # the port at which the clients will connect
14 | clientPort=2181
15 | # the maximum number of client connections.
16 | # increase this if you need to handle more clients
17 | #maxClientCnxns=60
18 | #
19 | # Be sure to read the maintenance section of the 
20 | # administrator guide before turning on autopurge.
21 | #
22 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
23 | #
24 | # The number of snapshots to retain in dataDir
25 | #autopurge.snapRetainCount=3
26 | # Purge task interval in hours
27 | # Set to "0" to disable auto purge feature
28 | #autopurge.purgeInterval=1
29 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |     <!-- 指定namenode的主机名和端口号，此处的端口号需要和hdfs-site.xml的dfs.namenode.rpc-address配置的端口相同 -->
21 |     <property>
22 |         <name>fs.defaultFS</name>
23 |         <value>hdfs://node-01:9000</value>
24 |         <description>The name of the default file system.</description>
25 |     </property>
26 |     <!-- 配置hadoop的临时目录，我们不需要把这个目录创建出来, 这里的路径默认是NameNode,DataNode,JournalNode等存放数据的公共目录-->  
27 |     <property>  
28 |         <name>hadoop.tmp.dir</name>  
29 |         <value>/home/xxproject/data/hadoop/tmp</value>  
30 |         <description>A base for other temporary directories.</description>    
31 |     </property>
32 | </configuration>
33 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |     <!-- 这里是secondary namenode的主机名、端口，可以通过HTTP/HTTPS访问 -->
21 |     <property>  
22 |         <name>dfs.namenode.secondary.http-address</name>  
23 |         <value>node-02:50090</value>  
24 |         <description>The secondary namenode http server address and port. </description>  
25 |     </property>
26 |     <property>  
27 |         <name>dfs.namenode.secondary.https-address</name>
28 |         <value>node-02:50091</value>
29 |         <description>The secondary namenode HTTPS server address and port. </description> 
30 |     </property> 
31 |     <!-- 配置web UI 访问hdfs系统的地址-->  
32 |     <property>  
33 |         <name>dfs.namenode.http-address</name>  
34 |         <value>node-01:50070</value>  
35 |         <description>The address and the base port where the dfs namenode web ui will listen on. </description>  
36 |     </property>   
37 |     <!-- 设置HDFS的副本数 -->  
38 |     <property>  
39 |         <name>dfs.replication</name>  
40 |         <value>3</value>  
41 |     </property>
42 | </configuration>
43 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |     <!-- 指定MR资源调度方式 -->  
21 |     <property>  
22 |         <name>mapreduce.framework.name</name>  
23 |         <value>yarn</value>  
24 |         <description>The runtime framework for executing MapReduce jobs. Can be one of local, classic or yarn.</description>  
25 |     </property>
26 |     
27 |     <!-- MR默认需要的资源数 -->  
28 |     <property>  
29 |         <name>yarn.app.mapreduce.am.resource.mb</name>  
30 |         <value>1536</value>  
31 |         <description>The amount of memory the MR AppMaster needs.</description>  
32 |     </property>
33 |     <property>  
34 |         <name>yarn.app.mapreduce.am.resource.cpu-vcores</name>  
35 |         <value>1</value>  
36 |         <description>The number of virtual CPU cores the MR AppMaster needs.</description>  
37 |     </property>
38 |     
39 |     <!-- 日志收集 -->
40 |     <!-- <property>
41 |         <name>mapreduce.jobtracker.address</name>
42 |         <value>node-01:9001</value>
43 |    </property> -->
44 | </configuration>
45 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/masters:
--------------------------------------------------------------------------------
1 | node-02
2 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/slaves:
--------------------------------------------------------------------------------
1 | node-02
2 | node-03
3 | node-04
4 | 
5 | 


--------------------------------------------------------------------------------
/conf_data/非HA场景/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 |     <!-- Site specific YARN configuration properties -->
17 |     <!--Resourcemanager的配置,自定ResourceManager的地址，还是单点，这是隐患-->  
18 |     <property>  
19 |         <name>yarn.resourcemanager.hostname</name>  
20 |         <value>node-01</value>  
21 |     </property>  
22 |   
23 |     <!--配置resourcemanager的http地址 The http address of the RM web application.-->  
24 |     <property>  
25 |         <name>yarn.resourcemanager.webapp.address</name>  
26 |         <value>node-01:8088</value>  
27 |     </property>  
28 |   
29 |     <!--配置resourcemanager的https地址 The https adddress of the RM web application.-->  
30 |     <property>  
31 |         <name>yarn.resourcemanager.webapp.https.address</name>  
32 |         <value>node-01:8090</value>  
33 |     </property>  
34 |   
35 |     <!-- 在NM上还可以扩展自己的服务，yarn提供了一个yarn.nodemanager.aux-services的配置项，通过该配置，用户可以自定义一些服务，例如Map-Reduce的shuffle功能就是采用这种方式实现的 -->  
36 |     <property>  
37 |         <name>yarn.nodemanager.aux-services</name>  
38 |         <value>mapreduce_shuffle</value>  
39 |     </property>
40 |     
41 |     <!--新版本需要配置可用的资源数, Amount of physical memory, in MB, that can be allocated for containers.-->  
42 |     <property>
43 |         <name>yarn.nodemanager.resource.memory-mb</name>
44 |         <value>1536</value>
45 |     </property>
46 |     <property>
47 |         <name>yarn.nodemanager.resource.cpu-vcores</name>
48 |         <value>1</value>
49 |     </property>
50 |     
51 |     <!-- 日志收集 -->
52 |     <!-- <property>
53 |         <name>yarn.log-aggregation-enable</name>
54 |         <value>true</value>
55 |         <description>Configuration to enable or disable log aggregation</description>
56 |     </property>
57 |     <property>
58 |         <name>yarn.nodemanager.remote-app-log-dir</name>
59 |         <value>/home/xxproject/logs/hadoop</value>
60 |         <description>HDFS directory where the application logs are moved on application completion. Need to set appropriate permissions. Only applicable if log-aggregation is enabled. The default value is "/logs" or "/tmp/logs" </description>
61 |     </property> -->
62 | 
63 | </configuration>
64 | 


--------------------------------------------------------------------------------
/data_analyze.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.jpg


--------------------------------------------------------------------------------
/data_analyze.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.png


--------------------------------------------------------------------------------
/hadoop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/hadoop.jpg


--------------------------------------------------------------------------------
/hbase/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry including="**/*.java" kind="src" path="src/main/resources"/>
16 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
17 | 		<attributes>
18 | 			<attribute name="maven.pomderived" value="true"/>
19 | 		</attributes>
20 | 	</classpathentry>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
22 | 	<classpathentry kind="output" path="target/classes"/>
23 | </classpath>
24 | 


--------------------------------------------------------------------------------
/hbase/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/hbase/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>hbase</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/hbase/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/hbase/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/hbase/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 
13 | 	<artifactId>hbase</artifactId>
14 | 
15 | 	<dependencies>
16 | 		<!-- <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> 
17 | 			<version>3.8.1</version> <scope>test</scope> </dependency> -->
18 | 		<dependency>
19 | 			<groupId>log4j</groupId>
20 | 			<artifactId>log4j</artifactId>
21 | 			<version>1.2.17</version>
22 | 		</dependency>
23 | 		<dependency>
24 | 			<groupId>org.apache.hbase</groupId>
25 | 			<artifactId>hbase-client</artifactId>
26 | 			<version>1.2.6</version>
27 | 		</dependency>
28 | 	</dependencies>
29 | </project>
30 | 


--------------------------------------------------------------------------------
/hbase/src/main/java/com/xcompany/xproject/hbase/App.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hbase;
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | public class App 
 8 | {
 9 |     public static void main( String[] args )
10 |     {
11 |         System.out.println( "Hello World!" );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/hbase/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/hbase/src/test/java/com/xcompany/xproject/hbase/AppTest.java:
--------------------------------------------------------------------------------
 1 | //package com.xcompany.xproject.hbase;
 2 | //
 3 | //import junit.framework.Test;
 4 | //import junit.framework.TestCase;
 5 | //import junit.framework.TestSuite;
 6 | //
 7 | ///**
 8 | // * Unit test for simple App.
 9 | // */
10 | //public class AppTest 
11 | //    extends TestCase
12 | //{
13 | //    /**
14 | //     * Create the test case
15 | //     *
16 | //     * @param testName name of the test case
17 | //     */
18 | //    public AppTest( String testName )
19 | //    {
20 | //        super( testName );
21 | //    }
22 | //
23 | //    /**
24 | //     * @return the suite of tests being tested
25 | //     */
26 | //    public static Test suite()
27 | //    {
28 | //        return new TestSuite( AppTest.class );
29 | //    }
30 | //
31 | //    /**
32 | //     * Rigourous Test :-)
33 | //     */
34 | //    public void testApp()
35 | //    {
36 | //        assertTrue( true );
37 | //    }
38 | //}
39 | 


--------------------------------------------------------------------------------
/hdfs/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry including="**/*.java" kind="src" path="src/main/resources"/>
16 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
17 | 		<attributes>
18 | 			<attribute name="maven.pomderived" value="true"/>
19 | 		</attributes>
20 | 	</classpathentry>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
22 | 		<attributes>
23 | 			<attribute name="maven.pomderived" value="true"/>
24 | 		</attributes>
25 | 	</classpathentry>
26 | 	<classpathentry kind="output" path="target/classes"/>
27 | </classpath>
28 | 


--------------------------------------------------------------------------------
/hdfs/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/hdfs/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>hdfs</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/hdfs/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/hdfs/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/hdfs/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <parent>
 4 |     <artifactId>hadoop</artifactId>
 5 |     <groupId>com.xcompany.xproject</groupId>
 6 |     <version>1.0.0-RELEASE</version>
 7 |   </parent>
 8 |   <modelVersion>4.0.0</modelVersion>
 9 |   <artifactId>hdfs</artifactId>
10 |   <name>hdfs</name>
11 |   <dependencies>
12 |     <dependency>
13 |       <groupId>junit</groupId>
14 |       <artifactId>junit</artifactId>
15 |       <version>4.12</version>
16 |       <scope>test</scope>
17 |       <exclusions>
18 |         <exclusion>
19 |           <artifactId>hamcrest-core</artifactId>
20 |           <groupId>org.hamcrest</groupId>
21 |         </exclusion>
22 |       </exclusions>
23 |     </dependency>
24 |   </dependencies>
25 | </project>
26 | 
27 | 


--------------------------------------------------------------------------------
/hdfs/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 
13 | 	<artifactId>hdfs</artifactId>
14 | 	<name>hdfs</name>
15 | 
16 | 	<dependencies>
17 | 		<dependency>
18 |             <groupId>org.apache.hadoop</groupId>
19 |             <artifactId>hadoop-client</artifactId>
20 |             <version>2.7.4</version>
21 |         </dependency>
22 | 	</dependencies>
23 | </project>
24 | 


--------------------------------------------------------------------------------
/hdfs/src/main/java/com/xcompany/xproject/hdfs/App.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hdfs;
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | public class App 
 8 | {
 9 |     public static void main( String[] args )
10 |     {
11 |         System.out.println( "Hello World!" );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/hdfs/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/hdfs/src/test/java/com/xcompany/xproject/hdfs/AppTest.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hdfs;
 2 | 
 3 | import junit.framework.Test;
 4 | import junit.framework.TestCase;
 5 | import junit.framework.TestSuite;
 6 | 
 7 | /**
 8 |  * Unit test for simple App.
 9 |  */
10 | public class AppTest 
11 |     extends TestCase
12 | {
13 |     /**
14 |      * Create the test case
15 |      *
16 |      * @param testName name of the test case
17 |      */
18 |     public AppTest( String testName )
19 |     {
20 |         super( testName );
21 |     }
22 | 
23 |     /**
24 |      * @return the suite of tests being tested
25 |      */
26 |     public static Test suite()
27 |     {
28 |         return new TestSuite( AppTest.class );
29 |     }
30 | 
31 |     /**
32 |      * Rigourous Test :-)
33 |      */
34 |     public void testApp()
35 |     {
36 |         assertTrue( true );
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/hdfs/src/test/java/com/xcompany/xproject/hdfs/HDFSTest.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hdfs;
 2 | 
 3 | import java.io.FileInputStream;
 4 | import java.io.FileNotFoundException;
 5 | import java.io.FileOutputStream;
 6 | import java.io.IOException;
 7 | 
 8 | import org.apache.commons.io.IOUtils;
 9 | import org.apache.hadoop.conf.Configuration;
10 | import org.apache.hadoop.fs.FSDataInputStream;
11 | import org.apache.hadoop.fs.FSDataOutputStream;
12 | import org.apache.hadoop.fs.FileSystem;
13 | import org.apache.hadoop.fs.LocatedFileStatus;
14 | import org.apache.hadoop.fs.Path;
15 | import org.apache.hadoop.fs.RemoteIterator;
16 | import org.junit.After;
17 | import org.junit.Before;
18 | import org.junit.Test;
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 | 
22 | public class HDFSTest {
23 | 	
24 | 	private static final Logger LOGGER = LoggerFactory.getLogger(HDFSTest.class);
25 | 	private FileSystem fs = null;
26 | 	
27 | 	@Before
28 | 	public void setUp() throws IOException {
29 | 		Configuration conf = new Configuration();
30 | 		conf.set("fs.defaultFS", "hdfs://node-01:9000");
31 | 		fs = FileSystem.get(conf);
32 | 	}
33 | 	
34 | 	@After
35 | 	public void tearDown() throws IOException {
36 | 		fs.close();
37 | 	}
38 | 	
39 | 	@Test
40 | 	public void testList() throws FileNotFoundException, IOException {
41 | 		Path f = new Path("/");
42 | 		RemoteIterator<LocatedFileStatus> files = fs.listFiles(f, true);
43 | 		while (files.hasNext()) {
44 | 			LocatedFileStatus file = (LocatedFileStatus) files.next();
45 | 			LOGGER.info("====={}", file.getPath());
46 | 		}
47 | 	}
48 | 	
49 | 	@Test
50 | 	public void testPut() throws IOException {
51 | 		Path f = new Path("/put-word-count.txt");
52 | 		FSDataOutputStream fsDataOutputStream = fs.create(f, true);
53 | 		FileInputStream fileInputStream = new FileInputStream("/home/xxproject/word-count.txt");
54 | 		IOUtils.copy(fileInputStream, fsDataOutputStream);
55 | 	}
56 | 	
57 | 	@Test
58 | 	public void testGet() throws IOException {
59 | 		Path f = new Path("/put/word-count.txt");
60 | 		FSDataInputStream fsDataInputStream = fs.open(f);
61 | 		FileOutputStream fileOutputStream = new FileOutputStream("/home/xxproject/get-word-count.txt");
62 | 		IOUtils.copy(fsDataInputStream, fileOutputStream);
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/hive/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry including="**/*.java" kind="src" path="src/main/resources"/>
16 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
17 | 		<attributes>
18 | 			<attribute name="maven.pomderived" value="true"/>
19 | 		</attributes>
20 | 	</classpathentry>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
22 | 		<attributes>
23 | 			<attribute name="maven.pomderived" value="true"/>
24 | 		</attributes>
25 | 	</classpathentry>
26 | 	<classpathentry kind="output" path="target/classes"/>
27 | </classpath>
28 | 


--------------------------------------------------------------------------------
/hive/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/hive/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>hive</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/hive/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/hive/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/hive/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <parent>
 4 |     <artifactId>hadoop</artifactId>
 5 |     <groupId>com.xcompany.xproject</groupId>
 6 |     <version>1.0.0-RELEASE</version>
 7 |   </parent>
 8 |   <modelVersion>4.0.0</modelVersion>
 9 |   <artifactId>hive</artifactId>
10 |   <name>hive</name>
11 |   <build>
12 |     <finalName>${project.artifactId}-${project.version}</finalName>
13 |     <plugins>
14 |       <plugin>
15 |         <artifactId>maven-shade-plugin</artifactId>
16 |         <version>2.2</version>
17 |         <executions>
18 |           <execution>
19 |             <phase>package</phase>
20 |             <goals>
21 |               <goal>shade</goal>
22 |             </goals>
23 |             <configuration>
24 |               <filters>
25 |                 <filter>
26 |                   <artifact>*:*</artifact>
27 |                   <excludes>
28 |                     <exclude>META-INF/*.SF</exclude>
29 |                     <exclude>META-INF/*.DSA</exclude>
30 |                     <exclude>META-INF/*.RSA</exclude>
31 |                     <exclude>META-INF/MANIFEST.MF</exclude>
32 |                     <exclude>META-INF/log4j-provider.properties</exclude>
33 |                   </excludes>
34 |                 </filter>
35 |               </filters>
36 |               <transformers>
37 |                 <transformer>
38 |                   <resource>META-INF/spring.handlers</resource>
39 |                 </transformer>
40 |                 <transformer>
41 |                   <mainClass>com.xcompany.xproject.hive.Phone2Area</mainClass>
42 |                 </transformer>
43 |                 <transformer>
44 |                   <resource>META-INF/spring.schemas</resource>
45 |                 </transformer>
46 |               </transformers>
47 |             </configuration>
48 |           </execution>
49 |         </executions>
50 |       </plugin>
51 |     </plugins>
52 |   </build>
53 |   <dependencies>
54 |     <dependency>
55 |       <groupId>junit</groupId>
56 |       <artifactId>junit</artifactId>
57 |       <version>4.12</version>
58 |       <scope>test</scope>
59 |       <exclusions>
60 |         <exclusion>
61 |           <artifactId>hamcrest-core</artifactId>
62 |           <groupId>org.hamcrest</groupId>
63 |         </exclusion>
64 |       </exclusions>
65 |     </dependency>
66 |   </dependencies>
67 | </project>
68 | 
69 | 


--------------------------------------------------------------------------------
/hive/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 
13 | 	<artifactId>hive</artifactId>
14 | 
15 | 	<dependencies>
16 | 		<!-- <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> 
17 | 			<version>3.8.1</version> <scope>test</scope> </dependency> -->
18 | 		<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-exec -->
19 | 		<dependency>
20 | 			<groupId>org.apache.hive</groupId>
21 | 			<artifactId>hive-exec</artifactId>
22 | 			<version>2.1.1</version>
23 | 		</dependency>
24 | 		<dependency>
25 | 			<groupId>log4j</groupId>
26 | 			<artifactId>log4j</artifactId>
27 | 			<version>1.2.17</version>
28 | 		</dependency>
29 | 		<!-- https://mvnrepository.com/artifact/xerces/xerces -->
30 | 		<!-- <dependency> <groupId>xerces</groupId> <artifactId>xerces</artifactId> 
31 | 			<version>2.4.0</version> </dependency> -->
32 | 		<!-- <dependency> <groupId>com.github.edwgiz</groupId> <artifactId>maven-shade-plugin.log4j2-cachefile-transformer</artifactId> 
33 | 			<version>2.1</version> </dependency> -->
34 | 
35 | 	</dependencies>
36 | 
37 | 	<build>
38 | 		<plugins>
39 | 			<plugin>
40 | 				<groupId>org.apache.maven.plugins</groupId>
41 | 				<artifactId>maven-shade-plugin</artifactId>
42 | 				<version>2.2</version>
43 | 				<executions>
44 | 					<execution>
45 | 						<phase>package</phase>
46 | 						<goals>
47 | 							<goal>shade</goal>
48 | 						</goals>
49 | 						<configuration>
50 | 							<filters>
51 | 								<filter>
52 | 									<artifact>*:*</artifact>
53 | 									<excludes>
54 | 										<exclude>META-INF/*.SF</exclude>
55 | 										<exclude>META-INF/*.DSA</exclude>
56 | 										<exclude>META-INF/*.RSA</exclude>
57 | 										<exclude>META-INF/MANIFEST.MF</exclude>
58 | 										<exclude>META-INF/log4j-provider.properties</exclude>
59 | 									</excludes>
60 | 								</filter>
61 | 							</filters>
62 | 
63 | 							<transformers>
64 | 								<transformer
65 | 									implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
66 | 									<resource>META-INF/spring.handlers</resource>
67 | 								</transformer>
68 | 								<transformer
69 | 									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
70 | 									<mainClass>com.xcompany.xproject.hive.Phone2Area</mainClass>
71 | 								</transformer>
72 | 								<transformer
73 | 									implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
74 | 									<resource>META-INF/spring.schemas</resource>
75 | 								</transformer>
76 | 								<!-- <transformer implementation="com.github.edwgiz.mavenShadePlugin.log4j2CacheTransformer.PluginsCacheFileTransformer"> 
77 | 									</transformer> -->
78 | 							</transformers>
79 | 						</configuration>
80 | 					</execution>
81 | 				</executions>
82 | 			</plugin>
83 | 		</plugins>
84 | 		<finalName>${project.artifactId}-${project.version}</finalName>
85 | 	</build>
86 | 
87 | </project>
88 | 


--------------------------------------------------------------------------------
/hive/src/main/java/com/xcompany/xproject/hive/Phone2Area.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hive;
 2 | 
 3 | import java.util.HashMap;
 4 | 
 5 | import org.apache.hadoop.hive.ql.exec.UDF;
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | /*
10 |  * mvn clean --projects=com.xcompany.xproject:hive
11 |  * mvn install --projects=com.xcompany.xproject:hive
12 |  * java -jar hive/target/hive-1.0.0-RELEASE.jar
13 | */
14 | public class Phone2Area extends UDF {
15 | 	
16 | 	private static final Logger LOGGER = LoggerFactory.getLogger(Phone2Area.class);
17 | 
18 | 	// Load Once, Speed Up
19 | 	private static HashMap<String, String> areaMap = new HashMap<String, String>();
20 | 
21 | 	private static void loadData() {
22 | 		areaMap.put("135", "beijing");
23 | 		areaMap.put("136", "shanghai");
24 | 		areaMap.put("137", "xian");
25 | 		areaMap.put("138", "wuhan");
26 | 	}
27 | 
28 | 	static {
29 | //		System.setProperty("log4j2.loggerContextFactory", "org.apache.logging.log4j.core.impl.Log4jContextFactory");
30 | 		loadData();
31 | 	}
32 | 
33 | 	public String evaluate(String phoneNum) {
34 | 		String preKey = phoneNum.substring(0,3);
35 | 		return (areaMap.get(preKey) == null) ? "other" : areaMap.get(preKey);
36 | 	}
37 | 	
38 | 	public static void main(String[] args) {
39 | 		Phone2Area phone2Area = new Phone2Area();
40 | 		LOGGER.error(phone2Area.evaluate("18665817689"));
41 | 	}
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/hive/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/hive/src/test/java/com/xcompany/xproject/hive/AppTest.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.hive;
 2 | 
 3 | import junit.framework.Test;
 4 | import junit.framework.TestCase;
 5 | import junit.framework.TestSuite;
 6 | 
 7 | /**
 8 |  * Unit test for simple App.
 9 |  */
10 | public class AppTest 
11 |     extends TestCase
12 | {
13 |     /**
14 |      * Create the test case
15 |      *
16 |      * @param testName name of the test case
17 |      */
18 |     public AppTest( String testName )
19 |     {
20 |         super( testName );
21 |     }
22 | 
23 |     /**
24 |      * @return the suite of tests being tested
25 |      */
26 |     public static Test suite()
27 |     {
28 |         return new TestSuite( AppTest.class );
29 |     }
30 | 
31 |     /**
32 |      * Rigourous Test :-)
33 |      */
34 |     public void testApp()
35 |     {
36 |         assertTrue( true );
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/kafka/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry including="**/*.java" kind="src" path="src/main/resources"/>
16 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
17 | 		<attributes>
18 | 			<attribute name="maven.pomderived" value="true"/>
19 | 		</attributes>
20 | 	</classpathentry>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
22 | 		<attributes>
23 | 			<attribute name="maven.pomderived" value="true"/>
24 | 		</attributes>
25 | 	</classpathentry>
26 | 	<classpathentry kind="output" path="target/classes"/>
27 | </classpath>
28 | 


--------------------------------------------------------------------------------
/kafka/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/kafka/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>kafka</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/kafka/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/kafka/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/kafka/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 
13 | 	<artifactId>kafka</artifactId>
14 | 
15 | 
16 | 	<dependencies>
17 | 		<!-- <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> 
18 | 			<version>3.8.1</version> <scope>test</scope> </dependency> -->
19 | 		<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka_2.11 -->
20 | 		<dependency>
21 | 			<groupId>org.apache.kafka</groupId>
22 | 			<artifactId>kafka_2.11</artifactId>
23 | 			<version>0.11.0.1</version>
24 | 		</dependency>
25 | 
26 | 	</dependencies>
27 | </project>
28 | 
29 | 


--------------------------------------------------------------------------------
/kafka/src/main/java/com/xcompany/xproject/kafka/TestConsumer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.kafka;
 2 | 
 3 | import java.util.Arrays;
 4 | import java.util.Properties;
 5 | 
 6 | import org.apache.kafka.clients.consumer.Consumer;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 8 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 9 | import org.apache.kafka.clients.consumer.KafkaConsumer;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 | 
13 | public class TestConsumer {
14 |     
15 |     private static final Logger LOGGER = LoggerFactory.getLogger(TestConsumer.class);
16 |     
17 |     public static void main(String[] args) {
18 |         Properties properties = new Properties();
19 |         // bin/kafka-topics.sh
20 |         properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181");
21 |         // kafka-console-producer.sh
22 |         properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092");
23 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
24 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 |         // kafka-console-consumer.sh
26 |         properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092");
27 |         
28 |         // must sepc group.id
29 |         properties.put("group.id", "test-group-new");
30 |         properties.put("auto.offset.reset", "earliest");
31 |         
32 |         Consumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
33 |         consumer.subscribe(Arrays.asList("order-r"));
34 |         try {
35 |             while (true) {
36 |                 ConsumerRecords<String, String> records = consumer.poll(1000);   // ms
37 |                 for (ConsumerRecord<String, String> record : records) {
38 |                     LOGGER.info("offset = {}, key = {}, value = {}\n", record.offset(), record.key(), record.value());
39 |                 }
40 |             }
41 |         } catch (Exception e) {
42 |         } finally {
43 |             consumer.close();
44 |         }
45 |         
46 |     }
47 |     
48 | }
49 | 


--------------------------------------------------------------------------------
/kafka/src/main/java/com/xcompany/xproject/kafka/TestProducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.kafka;
 2 | 
 3 | import java.util.Properties;
 4 | 
 5 | import org.apache.kafka.clients.producer.KafkaProducer;
 6 | import org.apache.kafka.clients.producer.Producer;
 7 | import org.apache.kafka.clients.producer.ProducerRecord;
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | 
12 | public class TestProducer {
13 |     
14 |     private static final Logger LOGGER = LoggerFactory.getLogger(TestProducer.class);
15 |     
16 |     public static void main(String[] args) {
17 |         Properties properties = new Properties();
18 |         // bin/kafka-topics.sh
19 |         properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181");
20 |         // kafka-console-producer.sh
21 |         properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092");
22 |         properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
23 |         properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
24 |         // kafka-console-consumer.sh
25 |         properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092");
26 |         
27 |         
28 |         Producer<String, String> producer = new KafkaProducer<String, String>(properties);
29 |         
30 |         LOGGER.info("roduce start...");
31 |         for (int i = 0; i < 100; i++) {
32 |             ProducerRecord<String, String> msg = new ProducerRecord<String, String>("order-r", "name", "Hello_XXX_" + i);
33 |             producer.send(msg);
34 |         }
35 |         producer.close();
36 |         LOGGER.info("produce end...");
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/kafka/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/mmdetection/1-mmdection安装使用记录.txt:
--------------------------------------------------------------------------------
 1 | # 0、mmdection各组件依赖版本
 2 | # 参考：https://mmdetection.readthedocs.io/en/latest/get_started.html
 3 | Linux or macOS (Windows is in experimental support)
 4 | Python 3.6+：3.7.4
 5 | PyTorch 1.3+：1.4
 6 | CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible):10.1
 7 | GCC 5+
 8 | MMCV
 9 | 
10 | # 1、参考：https://phoenixnap.com/kb/how-to-install-anaconda-ubuntu-18-04-or-20-04
11 | # curl –O https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh
12 | wget https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh
13 | bash Anaconda3-2020.02-Linux-x86_64.sh
14 | 
15 | # 2、Create a conda virtual environment and activate it
16 | conda create -n open-mmlab python=3.7.4 -y
17 | conda activate open-mmlab
18 | 
19 | # 3、Install PyTorch and torchvision following the official instructions
20 | # https://pytorch.org/get-started/locally/#windows-pip
21 | conda install pytorch=1.6.0 cudatoolkit=10.1 torchvision==0.7.0 -c pytorch -y
22 | 
23 | import torch
24 | x = torch.rand(5, 3)
25 | print(x)
26 | import torch
27 | torch.cuda.is_available()
28 | 
29 | # 4、Install mmcv-full, we recommend you to install the pre-build package as below
30 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
31 | # pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
32 | 
33 | #5、Clone the MMDetection repository.
34 | sudo apt-get -y install build-essential nghttp2 libnghttp2-dev libssl-dev
35 | git clone https://github.com/open-mmlab/mmdetection.git
36 | cd mmdetection
37 | 或者直接下载：wget https://github.com/open-mmlab/mmdetection/archive/v2.10.0.zip
38 | unzip mmdetection-2.10.0.zip
39 | mv mmdetection-2.10.0 mmdetection
40 | 
41 | # 6、Install build requirements and then install MMDetection.
42 | pip install -r requirements/build.txt
43 | pip install -v -e .  # or "python setup.py develop"
44 | 
45 | # 7、通过Docker镜像安装使用
46 | # 镜像仓库：https://hub.docker.com/search?q=mmdetection&type=image
47 | # We provide a Dockerfile to build an image. Ensure that you are using docker version >=19.03.
48 | # build an image with PyTorch 1.6, CUDA 10.1
49 | docker build -t mmdetection docker/
50 | docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmdetection/data mmdetection
51 | 
52 | # 8、验证环境是否安装成功
53 | import torch
54 | available_gpus = [torch.cuda.get_device_properties(i) for i in range(torch.cuda.device_count())]
55 | available_gpus
56 | 
57 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
58 | x = torch.tensor([1, 2, 3], device=device)
59 | print(x)
60 | 
61 | 
62 | from mmdet.apis import init_detector, inference_detector
63 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
64 | # download the checkpoint from model zoo and put it in `checkpoints/`
65 | # url: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
66 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
67 | device = 'cuda:0'
68 | # init a detector
69 | model = init_detector(config_file, checkpoint_file, device=device)
70 | # inference the demo image
71 | inference_detector(model, 'demo/demo.jpg')
72 | 
73 | # 8、Azure的GPU服务器规格
74 | 规格：Standard_NC6s_v3
75 | CPU：6核，内存：112G
76 | GPU：1卡，显存：16G
77 | 
78 | # nvidia-smi报错的问题
79 | dkms status
80 | sudo apt-get install dkms
81 | sudo dkms install -m nvidia -v 410.78
82 | nvidia-smi


--------------------------------------------------------------------------------
/mmdetection/2-mmdection预测新数据.txt:
--------------------------------------------------------------------------------
 1 | # notebook安装新内核
 2 | # 参考文档：https://docs.microsoft.com/zh-cn/azure/machine-learning/how-to-run-jupyter-notebooks
 3 | conda install pip -y 
 4 | conda install notebook ipykernel -y
 5 | python -m ipykernel install --user --name open-mmlab --display-name "Python (open-mmlab)"
 6 | 
 7 | # 下载预训练的模型参数
 8 | cd mmdetection
 9 | mkdir -p checkpoints/
10 | wget http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
11 | conda activate open-mmlab
12 | 
13 | # 执行代码
14 | from mmdet.apis import init_detector, inference_detector
15 | import mmcv
16 | 
17 | # Specify the path to model config and checkpoint file
18 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
19 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
20 | 
21 | # build the model from a config file and a checkpoint file
22 | model = init_detector(config_file, checkpoint_file, device='cuda:0')
23 | 
24 | # test a single image and show the results
25 | img = 'demo/demo.jpg'  # or img = mmcv.imread(img), which will only load it once
26 | result = inference_detector(model, img)
27 | # visualize the results in a new window
28 | model.show_result(img, result)
29 | # or save the visualization results to image files
30 | model.show_result(img, result, out_file='result.jpg')
31 | 
32 | # test a video and show the results
33 | video = mmcv.VideoReader('demo/demo.mp4')
34 | for frame in video:
35 |     result = inference_detector(model, frame)
36 |     model.show_result(frame, result, wait_time=1)
37 | 
38 | # AML的notrbook CPU版本
39 | from mmdet.apis import init_detector, inference_detector, show_result_pyplot
40 | import mmcv
41 | 
42 | # Specify the path to model config and checkpoint file
43 | config_file = 'mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
44 | checkpoint_file = 'mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
45 | 
46 | # build the model from a config file and a checkpoint file
47 | # model = init_detector(config_file, checkpoint_file, device='cuda:0')
48 | model = init_detector(config_file, checkpoint_file, device='cpu')    # 默认GPU改写成CPU
49 | 
50 | 
51 | 
52 | # test a single image and show the results
53 | img = 'mmdetection/demo/demo.jpg'  # or img = mmcv.imread(img), which will only load it once
54 | result = inference_detector(model, img)
55 | # visualize the results in a new window
56 | model.show_result(img, result)
57 | # or save the visualization results to image files
58 | model.show_result(img, result, out_file='mmdetection/result/result.jpg')
59 | 
60 | # show the results
61 | show_result_pyplot(model, img, result)
62 | 
63 | 
64 | # test a video and show the results
65 | video = mmcv.VideoReader('mmdetection/demo/demo.mp4')
66 | total_frame = 0
67 | for frame in video:
68 |     result = inference_detector(model, frame)
69 |     # show the results
70 |     show_result_pyplot(model, frame, result)
71 |     model.show_result(frame, result, wait_time=1)
72 |     total_frame += 1
73 | print(total_frame)
74 | 
75 | 
76 | !cd mmdetection \
77 |     && /anaconda/envs/open-mmlab/bin/python demo/image_demo.py \
78 |     demo/demo.jpg \
79 |     configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py \
80 |     checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
81 |     --device cpu
82 | 


--------------------------------------------------------------------------------
/mmdetection/3-mmdection模型指标测试.txt:
--------------------------------------------------------------------------------
 1 | # VOC 数据集下载（使用2007年数据集）
 2 | # https://pjreddie.com/projects/pascal-voc-dataset-mirror/
 3 | # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html
 4 | # https://cocodataset.org/
 5 | cd mmdetection
 6 | mkdir data
 7 | 
 8 | 
 9 | # 下载模型权重
10 | # https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc
11 | cd mmdetection
12 | mkdir -p checkpoints/
13 | wget http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth
14 | 
15 | # Test Faster R-CNN on PASCAL VOC (without saving the test results) and evaluate the mAP. 
16 | # Config and checkpoint files are available here.
17 | !cd mmdetection \
18 |     && /anaconda/envs/open-mmlab/bin/python tools/test.py \
19 |     configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py \
20 |     checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth \
21 |     --show-dir faster_rcnn_r50_fpn_1x_results/ \
22 |     --eval mAP recall
23 | 
24 | 
25 | 快速删除大文件夹、大文件
26 | mkdir -p blank
27 | rsync --delete-before -d blank/ VOCdevkit/
28 | 
29 | 


--------------------------------------------------------------------------------
/mmdetection/README:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mr/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry including="**/*.java" kind="src" path="src/main/resources"/>
16 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
17 | 		<attributes>
18 | 			<attribute name="maven.pomderived" value="true"/>
19 | 		</attributes>
20 | 	</classpathentry>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
22 | 		<attributes>
23 | 			<attribute name="maven.pomderived" value="true"/>
24 | 		</attributes>
25 | 	</classpathentry>
26 | 	<classpathentry kind="output" path="target/classes"/>
27 | </classpath>
28 | 


--------------------------------------------------------------------------------
/mr/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/mr/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>mr</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/mr/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/mr/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/mr/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <parent>
 4 |     <artifactId>hadoop</artifactId>
 5 |     <groupId>com.xcompany.xproject</groupId>
 6 |     <version>1.0.0-RELEASE</version>
 7 |   </parent>
 8 |   <modelVersion>4.0.0</modelVersion>
 9 |   <artifactId>mr</artifactId>
10 |   <name>jar</name>
11 |   <dependencies>
12 |     <dependency>
13 |       <groupId>junit</groupId>
14 |       <artifactId>junit</artifactId>
15 |       <version>4.12</version>
16 |       <scope>test</scope>
17 |       <exclusions>
18 |         <exclusion>
19 |           <artifactId>hamcrest-core</artifactId>
20 |           <groupId>org.hamcrest</groupId>
21 |         </exclusion>
22 |       </exclusions>
23 |     </dependency>
24 |   </dependencies>
25 | </project>
26 | 
27 | 


--------------------------------------------------------------------------------
/mr/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 	
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 	
13 | 	<artifactId>mr</artifactId>
14 | 	<name>jar</name>
15 | 
16 | 	<dependencies>
17 | 	   <dependency>
18 |             <groupId>org.apache.hadoop</groupId>
19 |             <artifactId>hadoop-client</artifactId>
20 |             <version>2.7.4</version>
21 |         </dependency>
22 | 	</dependencies>
23 | </project>
24 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/App.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr;
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | public class App 
 8 | {
 9 |     public static void main( String[] args )
10 |     {
11 |         System.out.println( "Hello World!" );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowBean.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowpartition;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.Writable;
 8 | 
 9 | public class FlowBean implements Writable {
10 | 	
11 | 	private String phoneNum;
12 | 	private long upFlow;
13 | 	private long downFlow;
14 | 	private long sumFlow;
15 | 	
16 | 	
17 | 	public String getPhoneNum() {
18 | 		return phoneNum;
19 | 	}
20 | 	public void setPhoneNum(String phoneNum) {
21 | 		this.phoneNum = phoneNum;
22 | 	}
23 | 	public long getUpFlow() {
24 | 		return upFlow;
25 | 	}
26 | 	public void setUpFlow(long upFlow) {
27 | 		this.upFlow = upFlow;
28 | 	}
29 | 	public long getDownFlow() {
30 | 		return downFlow;
31 | 	}
32 | 	public void setDownFlow(long downFlow) {
33 | 		this.downFlow = downFlow;
34 | 	}
35 | 	public long getSumFlow() {
36 | 		return sumFlow;
37 | 	}
38 | 	public void setSumFlow(long sumFlow) {
39 | 		this.sumFlow = sumFlow;
40 | 	}
41 | 	
42 | //	@Override
43 | //	public String toString() {
44 | //		return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
45 | //				+ ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
46 | //	}
47 | 	@Override
48 | 	public String toString() {
49 | 		return upFlow + "\t" + downFlow + "\t" + sumFlow;
50 | 	}
51 | 	
52 | 	public void write(DataOutput out) throws IOException {
53 | 		out.writeUTF(phoneNum);
54 | 		out.writeLong(upFlow);
55 | 		out.writeLong(downFlow);
56 | 		out.writeLong(sumFlow);
57 | 	}
58 | 	public void readFields(DataInput in) throws IOException {
59 | 		phoneNum = in.readUTF();
60 | 		upFlow = in.readLong();
61 | 		downFlow = in.readLong();
62 | 		sumFlow = in.readLong();
63 | 	}
64 | 	
65 | }
66 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartition.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowpartition;
 2 | 
 3 | import java.util.HashMap;
 4 | 
 5 | import org.apache.hadoop.mapreduce.Partitioner;
 6 | 
 7 | 
 8 | public class FlowPartition<K, V> extends Partitioner<K, V>{
 9 | 
10 | 	// Load Once, Speed Up
11 | 	private static HashMap<String, Integer> partitionMap = new HashMap<String, Integer>();
12 | 	private static void loadData() {
13 | 		partitionMap.put("135", 0);
14 | 		partitionMap.put("136", 1);
15 | 		partitionMap.put("137", 2);
16 | 		partitionMap.put("138", 3);
17 | 	}
18 | 	static {
19 | 		loadData();
20 | 	}
21 | 	
22 | 	@Override
23 | 	public int getPartition(K key, V value, int numPartitions) {
24 | 		//return 0;
25 | 		String preKey = key.toString().substring(0,3);
26 | 		return (partitionMap.get(preKey) == null) ? 4 : partitionMap.get(preKey);
27 | 	}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowpartition;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.conf.Configured;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 | 
18 | public class FlowPartitionJob extends Configured implements Tool {
19 | 	
20 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionJob.class);
21 | 
22 | 	public static void main(String[] args) throws Exception {
23 | 		
24 | 		Date startTime = new Date();
25 | 		LOGGER.info("==========job started: " + startTime);
26 | 		int res = ToolRunner.run(new Configuration(), new FlowPartitionJob(), args);
27 | 		Date endTime = new Date();
28 | 		LOGGER.info("==========job ended: " + endTime);
29 | 		LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | 		System.exit(res);
31 | 	}
32 | 
33 | 	public int run(String[] args) throws Exception {
34 | 		
35 | 	    /*Configuration conf = getConf();
36 | 	    JobClient client = new JobClient(conf);
37 | 	    ClusterStatus cluster = client.getClusterStatus();
38 | 	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | 	    String join_reduces = conf.get(REDUCES_PER_HOST);
40 | 	    if (join_reduces != null) {
41 | 	       num_reduces = cluster.getTaskTrackers() *
42 | 	                       Integer.parseInt(join_reduces);
43 | 	    }
44 | 	    // Set user-supplied (possibly default) job configs
45 | 	    job.setNumReduceTasks(num_reduces);*/
46 | 	    
47 | 	    
48 | 		Configuration conf = new Configuration();
49 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
51 | 				.getRemainingArgs();
52 | 
53 | 		String commaSeparatedPaths = null;
54 | 		String outputDir = null;
55 | 		if (otherArgs.length == 2) {
56 | 			commaSeparatedPaths = otherArgs[0];
57 | 			outputDir = otherArgs[1];
58 | 		} else {
59 | 			System.err.println("Usage: <in>[,<in>...] <out>");
60 | 			//System.exit(-1);
61 | 			return -1;
62 | 		}
63 | 		
64 | 
65 | 		Job job = Job.getInstance(conf);
66 | 		job.setJobName("FlowPartitionJob");
67 | 		job.setJarByClass(FlowPartitionJob.class);
68 | 		
69 | //		job.setInputFormatClass(TextInputFormat.class);
70 | //		job.setOutputFormatClass(TextOutputFormat.class);
71 | 		
72 | 		job.setMapperClass(FlowPartitionMapper.class);
73 | 		//job.setCombinerClass(WordCountReducer.class);
74 | 		job.setReducerClass(FlowPartitionReducer.class);
75 | 		
76 | 		job.setPartitionerClass(FlowPartition.class);
77 | 		job.setNumReduceTasks(5);
78 | 
79 | 		job.setOutputKeyClass(Text.class);
80 | 		job.setOutputValueClass(FlowBean.class);
81 | 		job.setMapOutputKeyClass(Text.class);
82 | 		job.setMapOutputValueClass(FlowBean.class);
83 | 
84 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
85 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
86 | 
87 | 		return job.waitForCompletion(true) ? 0 : 1;
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowpartition;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Mapper;
 8 | import org.apache.hadoop.util.StringUtils;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | public class FlowPartitionMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
13 | 	
14 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionMapper.class);
15 | 	
16 | 	private String line = null;
17 | 	private final static char SEPARATOR = '\t';
18 | 	
19 | 	private String phoneNum = null;
20 | 	private long upFlow = 0;
21 | 	private long downFlow = 0;
22 | 	//private long sumFlow = 0;
23 | 	
24 | 	private Text text = new Text();
25 | 	private FlowBean flowBean = new FlowBean();
26 | 	
27 | 	@Override
28 | 	protected void map(LongWritable key, Text value,
29 | 			Mapper<LongWritable, Text, Text, FlowBean>.Context context)
30 | 			throws IOException, InterruptedException {
31 | 
32 | 		//super.map(key, value, context);
33 | 		line = value.toString();
34 | 		String[] fields = StringUtils.split(line, SEPARATOR);
35 | 		if (fields.length != 11) {
36 | 			LOGGER.error("invalid line: {}", line);
37 | 			System.err.println("invalid line: " + line);
38 | 		} else {
39 | 			phoneNum = fields[1];
40 | 			upFlow = Long.parseLong(fields[8]);
41 | 			downFlow = Long.parseLong(fields[9]);
42 | 			flowBean.setPhoneNum(phoneNum);
43 | 			flowBean.setUpFlow(upFlow);
44 | 			flowBean.setDownFlow(downFlow);
45 | 			//sumFlow = upFlow + downFlow;
46 | 			flowBean.setSumFlow(upFlow + downFlow);
47 | 			text.set(phoneNum);
48 | 			context.write(text, flowBean);
49 | 		}
50 | 		
51 | 	}
52 | }
53 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowpartition;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.Reducer;
 7 | 
 8 | public class FlowPartitionReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
 9 | 	
10 | 	private FlowBean result = new FlowBean();
11 | 	
12 | 	@Override
13 | 	protected void reduce(Text key, Iterable<FlowBean> values,
14 | 			Reducer<Text, FlowBean, Text, FlowBean>.Context context)
15 | 			throws IOException, InterruptedException {
16 | 
17 | 		//super.reduce(arg0, arg1, arg2);
18 | 		long upFlow = 0;
19 | 		long downFlow = 0;
20 | 		//long flowSum = 0;
21 | 		for (FlowBean flowBean : values) {
22 | 			upFlow += flowBean.getUpFlow();
23 | 			downFlow += flowBean.getDownFlow();
24 | 			//flowSum += flowBean.getSumFlow();
25 | 		}
26 | 		result.setPhoneNum(key.toString());
27 | 		result.setUpFlow(upFlow);
28 | 		result.setDownFlow(downFlow);
29 | 		//result.setSumFlow(flowSum);
30 | 		result.setSumFlow(upFlow + downFlow);
31 | 		context.write(key, result);
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowBean.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsort;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.WritableComparable;
 8 | 
 9 | 
10 | public class FlowBean implements WritableComparable<FlowBean> {
11 | 
12 | 	private String phoneNum;
13 | 	private long upFlow;
14 | 	private long downFlow;
15 | 	private long sumFlow;
16 | 	
17 | 	
18 | 	public String getPhoneNum() {
19 | 		return phoneNum;
20 | 	}
21 | 	public void setPhoneNum(String phoneNum) {
22 | 		this.phoneNum = phoneNum;
23 | 	}
24 | 	public long getUpFlow() {
25 | 		return upFlow;
26 | 	}
27 | 	public void setUpFlow(long upFlow) {
28 | 		this.upFlow = upFlow;
29 | 	}
30 | 	public long getDownFlow() {
31 | 		return downFlow;
32 | 	}
33 | 	public void setDownFlow(long downFlow) {
34 | 		this.downFlow = downFlow;
35 | 	}
36 | 	public long getSumFlow() {
37 | 		return sumFlow;
38 | 	}
39 | 	public void setSumFlow(long sumFlow) {
40 | 		this.sumFlow = sumFlow;
41 | 	}
42 | 	
43 | //	@Override
44 | //	public String toString() {
45 | //		return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
46 | //				+ ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
47 | //	}
48 | 	@Override
49 | 	public String toString() {
50 | 		return phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow;
51 | 	}
52 | 	
53 | 	public void write(DataOutput out) throws IOException {
54 | 		out.writeUTF(phoneNum);
55 | 		out.writeLong(upFlow);
56 | 		out.writeLong(downFlow);
57 | 		out.writeLong(sumFlow);
58 | 	}
59 | 	public void readFields(DataInput in) throws IOException {
60 | 		phoneNum = in.readUTF();
61 | 		upFlow = in.readLong();
62 | 		downFlow = in.readLong();
63 | 		sumFlow = in.readLong();
64 | 	}
65 | 	public int compareTo(FlowBean o) {
66 | 		//return 0;
67 | 		// DESC
68 | 		long thisValue = this.sumFlow;
69 | 	    long thatValue = o.getSumFlow();
70 | 	    return (thisValue < thatValue ? 1 : (thisValue == thatValue ? 0 : -1));
71 | 	}
72 | 	
73 | }
74 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsort;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.conf.Configured;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.NullWritable;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 | 
18 | public class FlowSortJob extends Configured implements Tool {
19 | 	
20 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortJob.class);
21 | 
22 | 	public static void main(String[] args) throws Exception {
23 | 		
24 | 		Date startTime = new Date();
25 | 		LOGGER.info("==========job started: " + startTime);
26 | 		int res = ToolRunner.run(new Configuration(), new FlowSortJob(), args);
27 | 		Date endTime = new Date();
28 | 		LOGGER.info("==========job ended: " + endTime);
29 | 		LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | 		System.exit(res);
31 | 	}
32 | 
33 | 	public int run(String[] args) throws Exception {
34 | 		
35 | 	    /*Configuration conf = getConf();
36 | 	    JobClient client = new JobClient(conf);
37 | 	    ClusterStatus cluster = client.getClusterStatus();
38 | 	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | 	    String join_reduces = conf.get(REDUCES_PER_HOST);
40 | 	    if (join_reduces != null) {
41 | 	       num_reduces = cluster.getTaskTrackers() *
42 | 	                       Integer.parseInt(join_reduces);
43 | 	    }
44 | 	    // Set user-supplied (possibly default) job configs
45 | 	    job.setNumReduceTasks(num_reduces);*/
46 | 	    
47 | 	    
48 | 		Configuration conf = new Configuration();
49 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
51 | 				.getRemainingArgs();
52 | 
53 | 		String commaSeparatedPaths = null;
54 | 		String outputDir = null;
55 | 		if (otherArgs.length == 2) {
56 | 			commaSeparatedPaths = otherArgs[0];
57 | 			outputDir = otherArgs[1];
58 | 		} else {
59 | 			System.err.println("Usage: <in>[,<in>...] <out>");
60 | 			//System.exit(-1);
61 | 			return -1;
62 | 		}
63 | 		
64 | 
65 | 		Job job = Job.getInstance(conf);
66 | 		job.setJobName("FlowSortJob");
67 | 		job.setJarByClass(FlowSortJob.class);
68 | 		
69 | 		job.setMapperClass(FlowSortMapper.class);
70 | 		//job.setCombinerClass(WordCountReducer.class);
71 | 		job.setReducerClass(FlowSortReducer.class);
72 | 
73 | 		job.setOutputKeyClass(FlowBean.class);
74 | 		job.setOutputValueClass(NullWritable.class);
75 | 		job.setMapOutputKeyClass(FlowBean.class);
76 | 		job.setMapOutputValueClass(NullWritable.class);
77 | 
78 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
79 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
80 | 
81 | 		return job.waitForCompletion(true) ? 0 : 1;
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsort;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.NullWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Mapper;
 9 | import org.apache.hadoop.util.StringUtils;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 | 
13 | public class FlowSortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable> {
14 | 
15 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortMapper.class);
16 | 	
17 | 	private FlowBean flowBean = new FlowBean();
18 | 	
19 | 	private String line = null;
20 | 	private final static char SEPARATOR = '\t';
21 | 	
22 | 	private String phoneNum = null;
23 | 	private long upFlow = 0;
24 | 	private long downFlow = 0;
25 | 	private long sumFlow = 0;
26 | 	
27 | 	@Override
28 | 	protected void map(LongWritable key, Text value,
29 | 			Mapper<LongWritable, Text, FlowBean, NullWritable>.Context context)
30 | 			throws IOException, InterruptedException {
31 | 
32 | 		//super.map(key, value, context);
33 | 		line = value.toString();
34 | 		String[] fields = StringUtils.split(line, SEPARATOR);
35 | 		if (fields.length != 4) {
36 | 			LOGGER.error("invalid line: {}", line);
37 | 			System.err.println("invalid line: " + line);
38 | 		} else {
39 | 			phoneNum = fields[0];
40 | 			upFlow = Long.parseLong(fields[1]);
41 | 			downFlow = Long.parseLong(fields[2]);
42 | 			sumFlow = Long.parseLong(fields[3]);
43 | 			flowBean.setPhoneNum(phoneNum);
44 | 			flowBean.setUpFlow(upFlow);
45 | 			flowBean.setDownFlow(downFlow);
46 | 			flowBean.setSumFlow(sumFlow);
47 | 			context.write(flowBean, NullWritable.get());
48 | 		}
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsort;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.NullWritable;
 6 | import org.apache.hadoop.mapreduce.Reducer;
 7 | 
 8 | public class FlowSortReducer extends Reducer<FlowBean, NullWritable, FlowBean, NullWritable>{
 9 | 	
10 | 	@Override
11 | 	protected void reduce(FlowBean key, Iterable<NullWritable> values,
12 | 			Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
13 | 			throws IOException, InterruptedException {
14 | 
15 | 		//super.reduce(arg0, arg1, arg2);
16 | 		context.write(key, NullWritable.get());
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowBean.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsum;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.Writable;
 8 | 
 9 | public class FlowBean implements Writable {
10 | 	
11 | 	private String phoneNum;
12 | 	private long upFlow;
13 | 	private long downFlow;
14 | 	private long sumFlow;
15 | 	
16 | 	
17 | 	public String getPhoneNum() {
18 | 		return phoneNum;
19 | 	}
20 | 	public void setPhoneNum(String phoneNum) {
21 | 		this.phoneNum = phoneNum;
22 | 	}
23 | 	public long getUpFlow() {
24 | 		return upFlow;
25 | 	}
26 | 	public void setUpFlow(long upFlow) {
27 | 		this.upFlow = upFlow;
28 | 	}
29 | 	public long getDownFlow() {
30 | 		return downFlow;
31 | 	}
32 | 	public void setDownFlow(long downFlow) {
33 | 		this.downFlow = downFlow;
34 | 	}
35 | 	public long getSumFlow() {
36 | 		return sumFlow;
37 | 	}
38 | 	public void setSumFlow(long sumFlow) {
39 | 		this.sumFlow = sumFlow;
40 | 	}
41 | 	
42 | //	@Override
43 | //	public String toString() {
44 | //		return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
45 | //				+ ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
46 | //	}
47 | 	@Override
48 | 	public String toString() {
49 | 		return upFlow + "\t" + downFlow + "\t" + sumFlow;
50 | 	}
51 | 	
52 | 	public void write(DataOutput out) throws IOException {
53 | 		out.writeUTF(phoneNum);
54 | 		out.writeLong(upFlow);
55 | 		out.writeLong(downFlow);
56 | 		out.writeLong(sumFlow);
57 | 	}
58 | 	public void readFields(DataInput in) throws IOException {
59 | 		phoneNum = in.readUTF();
60 | 		upFlow = in.readLong();
61 | 		downFlow = in.readLong();
62 | 		sumFlow = in.readLong();
63 | 	}
64 | 	
65 | }
66 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsum;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.conf.Configured;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 | 
18 | public class FlowSumJob extends Configured implements Tool {
19 | 	
20 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumJob.class);
21 | 
22 | 	public static void main(String[] args) throws Exception {
23 | 		
24 | 		Date startTime = new Date();
25 | 		LOGGER.info("==========job started: " + startTime);
26 | 		int res = ToolRunner.run(new Configuration(), new FlowSumJob(), args);
27 | 		Date endTime = new Date();
28 | 		LOGGER.info("==========job ended: " + endTime);
29 | 		LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | 		System.exit(res);
31 | 	}
32 | 
33 | 	public int run(String[] args) throws Exception {
34 | 		
35 | 	    /*Configuration conf = getConf();
36 | 	    JobClient client = new JobClient(conf);
37 | 	    ClusterStatus cluster = client.getClusterStatus();
38 | 	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | 	    String join_reduces = conf.get(REDUCES_PER_HOST);
40 | 	    if (join_reduces != null) {
41 | 	       num_reduces = cluster.getTaskTrackers() *
42 | 	                       Integer.parseInt(join_reduces);
43 | 	    }
44 | 	    // Set user-supplied (possibly default) job configs
45 | 	    job.setNumReduceTasks(num_reduces);*/
46 | 	    
47 | 	    
48 | 		Configuration conf = new Configuration();
49 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
51 | 				.getRemainingArgs();
52 | 
53 | 		String commaSeparatedPaths = null;
54 | 		String outputDir = null;
55 | 		if (otherArgs.length == 2) {
56 | 			commaSeparatedPaths = otherArgs[0];
57 | 			outputDir = otherArgs[1];
58 | 		} else {
59 | 			System.err.println("Usage: <in>[,<in>...] <out>");
60 | 			//System.exit(-1);
61 | 			return -1;
62 | 		}
63 | 		
64 | 
65 | 		Job job = Job.getInstance(conf);
66 | 		job.setJobName("FlowSumJob");
67 | 		job.setJarByClass(FlowSumJob.class);
68 | 		
69 | 		job.setMapperClass(FlowSumMapper.class);
70 | 		//job.setCombinerClass(WordCountReducer.class);
71 | 		job.setReducerClass(FlowSumReducer.class);
72 | 
73 | 		job.setOutputKeyClass(Text.class);
74 | 		job.setOutputValueClass(FlowBean.class);
75 | 		job.setMapOutputKeyClass(Text.class);
76 | 		job.setMapOutputValueClass(FlowBean.class);
77 | 
78 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
79 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
80 | 
81 | 		return job.waitForCompletion(true) ? 0 : 1;
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsum;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Mapper;
 8 | import org.apache.hadoop.util.StringUtils;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | public class FlowSumMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
13 | 	
14 | 	private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumMapper.class);
15 | 	
16 | 	private String line = null;
17 | 	private final static char SEPARATOR = '\t';
18 | 	
19 | 	private String phoneNum = null;
20 | 	private long upFlow = 0;
21 | 	private long downFlow = 0;
22 | 	//private long sumFlow = 0;
23 | 	
24 | 	private Text text = new Text();
25 | 	private FlowBean flowBean = new FlowBean();
26 | 	
27 | 	@Override
28 | 	protected void map(LongWritable key, Text value,
29 | 			Mapper<LongWritable, Text, Text, FlowBean>.Context context)
30 | 			throws IOException, InterruptedException {
31 | 
32 | 		//super.map(key, value, context);
33 | 		line = value.toString();
34 | 		String[] fields = StringUtils.split(line, SEPARATOR);
35 | 		if (fields.length != 11) {
36 | 			LOGGER.error("invalid line: {}", line);
37 | 			System.err.println("invalid line: " + line);
38 | 		} else {
39 | 			phoneNum = fields[1];
40 | 			upFlow = Long.parseLong(fields[8]);
41 | 			downFlow = Long.parseLong(fields[9]);
42 | 			flowBean.setPhoneNum(phoneNum);
43 | 			flowBean.setUpFlow(upFlow);
44 | 			flowBean.setDownFlow(downFlow);
45 | 			//sumFlow = upFlow + downFlow;
46 | 			flowBean.setSumFlow(upFlow + downFlow);
47 | 			text.set(phoneNum);
48 | 			context.write(text, flowBean);
49 | 		}
50 | 		
51 | 	}
52 | }
53 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.flowsum;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.Reducer;
 7 | 
 8 | public class FlowSumReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
 9 | 	
10 | 	private FlowBean result = new FlowBean();
11 | 	
12 | 	@Override
13 | 	protected void reduce(Text key, Iterable<FlowBean> values,
14 | 			Reducer<Text, FlowBean, Text, FlowBean>.Context context)
15 | 			throws IOException, InterruptedException {
16 | 
17 | 		//super.reduce(arg0, arg1, arg2);
18 | 		long upFlow = 0;
19 | 		long downFlow = 0;
20 | 		//long flowSum = 0;
21 | 		for (FlowBean flowBean : values) {
22 | 			upFlow += flowBean.getUpFlow();
23 | 			downFlow += flowBean.getDownFlow();
24 | 			//flowSum += flowBean.getSumFlow();
25 | 		}
26 | 		result.setPhoneNum(key.toString());
27 | 		result.setUpFlow(upFlow);
28 | 		result.setDownFlow(downFlow);
29 | 		//result.setSumFlow(flowSum);
30 | 		result.setSumFlow(upFlow + downFlow);
31 | 		context.write(key, result);
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.conf.Configured;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.LongWritable;
 9 | import org.apache.hadoop.io.Text;
10 | import org.apache.hadoop.mapreduce.Job;
11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
13 | import org.apache.hadoop.util.GenericOptionsParser;
14 | import org.apache.hadoop.util.Tool;
15 | import org.apache.hadoop.util.ToolRunner;
16 | import org.slf4j.Logger;
17 | import org.slf4j.LoggerFactory;
18 | 
19 | public class StepOneJob extends Configured implements Tool {
20 | 	
21 | 	private static final Logger LOGGER = LoggerFactory.getLogger(StepOneJob.class);
22 | 
23 | 	public static void main(String[] args) throws Exception {
24 | 		
25 | 		Date startTime = new Date();
26 | 		LOGGER.info("==========job started: " + startTime);
27 | 		int res = ToolRunner.run(new Configuration(), new StepOneJob(), args);
28 | 		Date endTime = new Date();
29 | 		LOGGER.info("==========job ended: " + endTime);
30 | 		LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
31 | 		System.exit(res);
32 | 	}
33 | 
34 | 	public int run(String[] args) throws Exception {
35 | 		
36 | 	    /*Configuration conf = getConf();
37 | 	    JobClient client = new JobClient(conf);
38 | 	    ClusterStatus cluster = client.getClusterStatus();
39 | 	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
40 | 	    String join_reduces = conf.get(REDUCES_PER_HOST);
41 | 	    if (join_reduces != null) {
42 | 	       num_reduces = cluster.getTaskTrackers() *
43 | 	                       Integer.parseInt(join_reduces);
44 | 	    }
45 | 	    // Set user-supplied (possibly default) job configs
46 | 	    job.setNumReduceTasks(num_reduces);*/
47 | 	    
48 | 	    
49 | 		Configuration conf = new Configuration();
50 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
51 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
52 | 				.getRemainingArgs();
53 | 
54 | 		String commaSeparatedPaths = null;
55 | 		String outputDir = null;
56 | 		if (otherArgs.length == 2) {
57 | 			commaSeparatedPaths = otherArgs[0];
58 | 			outputDir = otherArgs[1];
59 | 		} else {
60 | 			System.err.println("Usage: <in>[,<in>...] <out>");
61 | 			//System.exit(-1);
62 | 			return -1;
63 | 		}
64 | 		
65 | 
66 | 		Job job = Job.getInstance(conf);
67 | 		job.setJobName("StepOneJob");
68 | 		job.setJarByClass(StepOneJob.class);
69 | 		
70 | //		job.setInputFormatClass(TextInputFormat.class);
71 | //		job.setOutputFormatClass(TextOutputFormat.class);
72 | 		
73 | 		job.setMapperClass(StepOneMapper.class);
74 | 		job.setCombinerClass(StepOneReducer.class);
75 | 		job.setReducerClass(StepOneReducer.class);
76 | 		
77 | //		job.setPartitionerClass(FlowPartition.class);
78 | //		job.setNumReduceTasks(5);
79 | 
80 | 		job.setOutputKeyClass(Text.class);
81 | 		job.setOutputValueClass(LongWritable.class);
82 | 		job.setMapOutputKeyClass(Text.class);
83 | 		job.setMapOutputValueClass(LongWritable.class);
84 | 
85 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
86 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
87 | 
88 | 		return job.waitForCompletion(true) ? 0 : 1;
89 | 	}
90 | }
91 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.StringTokenizer;
 5 | 
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Mapper;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
10 | 
11 | public class StepOneMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
12 | 	
13 | //	private static final Logger LOGGER = LoggerFactory.getLogger(StepOneMapper.class);
14 | 	
15 | 	private final static char SEPARATOR = '\t';
16 | 	
17 | 	private Text text = new Text();
18 | 	private static final LongWritable ONE = new LongWritable(1L);
19 | 	
20 | 	@Override
21 | 	protected void map(LongWritable key, Text value,
22 | 			Mapper<LongWritable, Text, Text, LongWritable>.Context context)
23 | 			throws IOException, InterruptedException {
24 | 
25 | 		//super.map(key, value, context);
26 | 		StringTokenizer itr = new StringTokenizer(value.toString());
27 | 		while (itr.hasMoreTokens()) {
28 | 			text.set(itr.nextToken() + SEPARATOR + ((FileSplit)context.getInputSplit()).getPath().getName());
29 | 			context.write(text, ONE);	
30 | 		}
31 | 	}
32 | }
33 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Reducer;
 8 | 
 9 | public class StepOneReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
10 | 
11 | 	private LongWritable result = new LongWritable();
12 | 	
13 | 	@Override
14 | 	protected void reduce(Text key, Iterable<LongWritable> values,
15 | 			Reducer<Text, LongWritable, Text, LongWritable>.Context context)
16 | 			throws IOException, InterruptedException {
17 | 
18 | 		//super.reduce(arg0, arg1, arg2);
19 | 		long count = 0;
20 | 		for (LongWritable value : values) {
21 | 			count += value.get();
22 | 		}
23 | 		result.set(count);
24 | 		context.write(key, result);
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.conf.Configured;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 | 
18 | public class StepTwoJob extends Configured implements Tool {
19 | 	
20 | 	private static final Logger LOGGER = LoggerFactory.getLogger(StepTwoJob.class);
21 | 
22 | 	public static void main(String[] args) throws Exception {
23 | 		
24 | 		Date startTime = new Date();
25 | 		LOGGER.info("==========job started: " + startTime);
26 | 		int res = ToolRunner.run(new Configuration(), new StepTwoJob(), args);
27 | 		Date endTime = new Date();
28 | 		LOGGER.info("==========job ended: " + endTime);
29 | 		LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | 		System.exit(res);
31 | 	}
32 | 
33 | 	public int run(String[] args) throws Exception {
34 | 		
35 | 	    /*Configuration conf = getConf();
36 | 	    JobClient client = new JobClient(conf);
37 | 	    ClusterStatus cluster = client.getClusterStatus();
38 | 	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | 	    String join_reduces = conf.get(REDUCES_PER_HOST);
40 | 	    if (join_reduces != null) {
41 | 	       num_reduces = cluster.getTaskTrackers() *
42 | 	                       Integer.parseInt(join_reduces);
43 | 	    }
44 | 	    // Set user-supplied (possibly default) job configs
45 | 	    job.setNumReduceTasks(num_reduces);*/
46 | 	    
47 | 	    
48 | 		Configuration conf = new Configuration();
49 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
51 | 				.getRemainingArgs();
52 | 
53 | 		String commaSeparatedPaths = null;
54 | 		String outputDir = null;
55 | 		if (otherArgs.length == 2) {
56 | 			commaSeparatedPaths = otherArgs[0];
57 | 			outputDir = otherArgs[1];
58 | 		} else {
59 | 			System.err.println("Usage: <in>[,<in>...] <out>");
60 | 			//System.exit(-1);
61 | 			return -1;
62 | 		}
63 | 		
64 | 
65 | 		Job job = Job.getInstance(conf);
66 | 		job.setJobName("StepTwoJob");
67 | 		job.setJarByClass(StepTwoJob.class);
68 | 		
69 | //		job.setInputFormatClass(TextInputFormat.class);
70 | //		job.setOutputFormatClass(TextOutputFormat.class);
71 | 		
72 | 		job.setMapperClass(StepTwoMapper.class);
73 | //		job.setCombinerClass(StepOneReducer.class);
74 | 		job.setReducerClass(StepTwoReducer.class);
75 | 		
76 | //		job.setPartitionerClass(FlowPartition.class);
77 | //		job.setNumReduceTasks(5);
78 | 
79 | 		job.setOutputKeyClass(Text.class);
80 | 		job.setOutputValueClass(Text.class);
81 | 		job.setMapOutputKeyClass(Text.class);
82 | 		job.setMapOutputValueClass(Text.class);
83 | 
84 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
85 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
86 | 
87 | 		return job.waitForCompletion(true) ? 0 : 1;
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Mapper;
 8 | import org.apache.hadoop.util.StringUtils;
 9 | 
10 | public class StepTwoMapper extends Mapper<LongWritable, Text, Text, Text> {
11 | 	
12 | 	private Text textKey = new Text();
13 | 	private Text textValue = new Text();
14 | 	
15 | 	private final static char SEPARATOR = '\t';
16 | 	private String line = null;
17 | 	private String word = null;
18 | 	private String fileName = null;
19 | 	private String count = null;
20 | 	
21 | 	@Override
22 | 	protected void map(LongWritable key, Text value,
23 | 			Mapper<LongWritable, Text, Text, Text>.Context context)
24 | 			throws IOException, InterruptedException {
25 | 
26 | 		//super.map(key, value, context);
27 | 		line = value.toString();
28 | 		String[] splits = StringUtils.split(line, SEPARATOR);
29 | 		word = splits[0];
30 | 		fileName = splits[1];
31 | 		count = splits[2];
32 | 		textKey.set(word);
33 | 		textValue.set(fileName + SEPARATOR + count);
34 | 		context.write(textKey, textValue);
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.invertedindex;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.Reducer;
 7 | import org.apache.hadoop.util.StringUtils;
 8 | 
 9 | public class StepTwoReducer extends Reducer<Text, Text, Text, Text>{
10 | 	
11 | 	private Text result = new Text();
12 | 	
13 | 	@Override
14 | 	protected void reduce(Text key, Iterable<Text> values,
15 | 			Reducer<Text, Text, Text, Text>.Context context) throws IOException,
16 | 			InterruptedException {
17 | 
18 | 		//super.reduce(arg0, arg1, arg2);
19 | 		result.set(StringUtils.join(";", values));
20 | 		context.write(key, result);
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountJob.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.wordcount;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.LongWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 | 
16 | /*
17 |  * conf: copy hadoop conf to src/main/resources dir or exe jar on hadoop node
18 |  * export: wordcount.jar
19 |  * example: hadoop jar wordcount.jar com.xcompany.xproject.mr.wordcount.WordCountJob /word-count/input /word-count/output
20 | */
21 | public class WordCountJob {
22 | 
23 | 	private static final Logger LOGGER = LoggerFactory
24 | 			.getLogger(WordCountJob.class);
25 | 
26 | 	public static void main(String[] args) throws IOException,
27 | 			ClassNotFoundException, InterruptedException {
28 | 		
29 | 		Configuration conf = new Configuration();
30 | 		//conf.set("fs.defaultFS", "hdfs://node-01:9000");
31 | 		String[] otherArgs = new GenericOptionsParser(conf, args)
32 | 				.getRemainingArgs();
33 | 
34 | 		String commaSeparatedPaths = null;
35 | 		String outputDir = null;
36 | 		if (otherArgs.length == 2) {
37 | 			commaSeparatedPaths = otherArgs[0];
38 | 			outputDir = otherArgs[1];
39 | 		} else {
40 | 			System.err.println("Usage: <in>[,<in>...] <out>");
41 | 			System.exit(-1);
42 | 		}
43 | 
44 | 		LOGGER.info("==========job start");
45 | 		Job job = Job.getInstance(conf);
46 | 		job.setJobName("WordCountJob");
47 | 		job.setJarByClass(WordCountJob.class);
48 | 		
49 | 		job.setMapperClass(WordCountMapper.class);
50 | 		job.setCombinerClass(WordCountReducer.class);
51 | 		job.setReducerClass(WordCountReducer.class);
52 | 
53 | 		job.setOutputKeyClass(Text.class);
54 | 		job.setOutputValueClass(LongWritable.class);
55 | 		job.setMapOutputKeyClass(Text.class);
56 | 		job.setMapOutputValueClass(LongWritable.class);
57 | 
58 | 		FileInputFormat.setInputPaths(job, commaSeparatedPaths);
59 | 		FileOutputFormat.setOutputPath(job, new Path(outputDir));
60 | 
61 | 		if (job.waitForCompletion(true)) {
62 | 			LOGGER.info("==========job success");
63 | 		} else {
64 | 			LOGGER.info("==========job failed");
65 | 		}
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountMapper.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.wordcount;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.StringTokenizer;
 5 | 
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Mapper;
 9 | 
10 | /*
11 |  * http://blog.csdn.net/boonya/article/details/54959393
12 |  * http://blog.csdn.net/guoery/article/details/8529004
13 |  * LongWritable: LineNumber
14 |  * Text        : LineString
15 |  * Text        : OutKey
16 |  * LongWritable: OutValue
17 | */
18 | public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
19 | 	
20 | 	private final static LongWritable ONE = new LongWritable(1L);
21 | 	private Text word = new Text();
22 | 	
23 | 	@Override
24 | 	protected void map(LongWritable key, Text value,
25 | 			Mapper<LongWritable, Text, Text, LongWritable>.Context context)
26 | 			throws IOException, InterruptedException {
27 | 
28 | 		//super.map(key, value, context);
29 | 		StringTokenizer itr = new StringTokenizer(value.toString());
30 | 		while (itr.hasMoreTokens()) {
31 | 			word.set(itr.nextToken());
32 | 			context.write(word, ONE);
33 | 			
34 | 		}
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountReducer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr.wordcount;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.Reducer;
 8 | 
 9 | public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
10 | 
11 | 	private LongWritable result = new LongWritable();
12 | 	
13 | 	@Override
14 | 	protected void reduce(Text key, Iterable<LongWritable> values,
15 | 			Reducer<Text, LongWritable, Text, LongWritable>.Context context)
16 | 			throws IOException, InterruptedException {
17 | 
18 | 		//super.reduce(arg0, arg1, arg2);
19 | 		long count = 0;
20 | 		for (LongWritable value : values) {
21 | 			count += value.get();
22 | 		}
23 | 		result.set(count);
24 | 		context.write(key, result);
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/mr/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/mr/src/test/java/com/xcompany/xproject/mr/AppTest.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.mr;
 2 | 
 3 | import junit.framework.Test;
 4 | import junit.framework.TestCase;
 5 | import junit.framework.TestSuite;
 6 | 
 7 | /**
 8 |  * Unit test for simple App.
 9 |  */
10 | public class AppTest 
11 |     extends TestCase
12 | {
13 |     /**
14 |      * Create the test case
15 |      *
16 |      * @param testName name of the test case
17 |      */
18 |     public AppTest( String testName )
19 |     {
20 |         super( testName );
21 |     }
22 | 
23 |     /**
24 |      * @return the suite of tests being tested
25 |      */
26 |     public static Test suite()
27 |     {
28 |         return new TestSuite( AppTest.class );
29 |     }
30 | 
31 |     /**
32 |      * Rigourous Test :-)
33 |      */
34 |     public void testApp()
35 |     {
36 |         assertTrue( true );
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 | 	<modelVersion>4.0.0</modelVersion>
 4 | 
 5 | 	<groupId>com.xcompany.xproject</groupId>
 6 | 	<artifactId>hadoop</artifactId>
 7 | 	<packaging>pom</packaging>
 8 | 	<version>1.0.0-RELEASE</version>
 9 | 
10 | 	<properties>
11 | 		<java.version>1.8</java.version>
12 | 		<hadoop.version>2.7.4</hadoop.version>
13 | 		
14 | 		<!-- <maven.compiler.source>1.8</maven.compiler.source>  
15 |         <maven.compiler.target>1.8</maven.compiler.target> -->  
16 | 	</properties>
17 | 
18 | 	<dependencies>
19 | 		<!-- https://mvnrepository.com/artifact/junit/junit -->
20 | 		<dependency>
21 | 			<groupId>junit</groupId>
22 | 			<artifactId>junit</artifactId>
23 | 			<version>4.12</version>
24 | 			<scope>test</scope>
25 | 			</dependency>
26 | 		<!-- <dependency>
27 | 			<groupId>org.apache.hadoop</groupId>
28 | 			<artifactId>hadoop-common</artifactId>
29 | 			<version>${hadoop.version}</version>
30 | 		</dependency>
31 | 		<dependency>
32 | 			<groupId>org.apache.hadoop</groupId>
33 | 			<artifactId>hadoop-hdfs</artifactId>
34 | 			<version>${hadoop.version}</version>
35 | 		</dependency> -->
36 | 		<!-- http://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client/2.7.4 -->
37 | 	</dependencies>
38 | 
39 | 	<!-- <build>
40 | 		<plugins>
41 | 			<plugin>
42 | 				<groupId>org.apache.maven.plugins</groupId>
43 | 				<artifactId>maven-shade-plugin</artifactId>
44 | 				<version>2.2</version>
45 | 				<executions>
46 | 					<execution>
47 | 						<phase>package</phase>
48 | 						<goals>
49 | 							<goal>shade</goal>
50 | 						</goals>
51 | 						<configuration>
52 | 							<transformers>
53 | 								<transformer
54 | 									implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
55 | 									<resource>META-INF/spring.handlers</resource>
56 | 								</transformer>
57 | 								<transformer
58 | 									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
59 | 									<mainClass>com.chenzhou.examples.Main</mainClass>
60 | 								</transformer>
61 | 								<transformer
62 | 									implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
63 | 									<resource>META-INF/spring.schemas</resource>
64 | 								</transformer>
65 | 							</transformers>
66 | 						</configuration>
67 | 					</execution>
68 | 				</executions>
69 | 			</plugin>
70 | 		</plugins>
71 | 		<finalName>${project.artifactId}-${project.version}</finalName>
72 | 	</build> -->
73 | 
74 | 	<modules>
75 | 		<module>hdfs</module>
76 | 		<module>rpc</module>
77 | 		<module>mr</module>
78 | 		<module>hive</module>
79 | 		<module>hbase</module>
80 | 		<module>storm</module>
81 | 		<module>kafka</module>
82 | 		<module>storm-kafka</module>
83 | 		<module>scala</module>
84 | 	    <module>spark</module>
85 | 	    <module>sparkstreaming</module>
86 | 	    <module>spark-streaming</module>
87 |   </modules>
88 | </project>
89 | 


--------------------------------------------------------------------------------
/rpc/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
10 | 		<attributes>
11 | 			<attribute name="maven.pomderived" value="true"/>
12 | 		</attributes>
13 | 	</classpathentry>
14 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15 | 		<attributes>
16 | 			<attribute name="optional" value="true"/>
17 | 			<attribute name="maven.pomderived" value="true"/>
18 | 		</attributes>
19 | 	</classpathentry>
20 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
21 | 		<attributes>
22 | 			<attribute name="maven.pomderived" value="true"/>
23 | 			<attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/>
24 | 		</attributes>
25 | 	</classpathentry>
26 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
27 | 		<attributes>
28 | 			<attribute name="maven.pomderived" value="true"/>
29 | 		</attributes>
30 | 	</classpathentry>
31 | 	<classpathentry kind="output" path="target/classes"/>
32 | </classpath>
33 | 


--------------------------------------------------------------------------------
/rpc/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/rpc/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>rpc</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.wst.jsdt.core.javascriptValidator</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.jdt.core.javabuilder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 		<buildCommand>
19 | 			<name>org.eclipse.wst.common.project.facet.core.builder</name>
20 | 			<arguments>
21 | 			</arguments>
22 | 		</buildCommand>
23 | 		<buildCommand>
24 | 			<name>org.eclipse.wst.validation.validationbuilder</name>
25 | 			<arguments>
26 | 			</arguments>
27 | 		</buildCommand>
28 | 		<buildCommand>
29 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
30 | 			<arguments>
31 | 			</arguments>
32 | 		</buildCommand>
33 | 	</buildSpec>
34 | 	<natures>
35 | 		<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
36 | 		<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
37 | 		<nature>org.eclipse.jdt.core.javanature</nature>
38 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
39 | 		<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
40 | 		<nature>org.eclipse.wst.jsdt.core.jsNature</nature>
41 | 	</natures>
42 | </projectDescription>
43 | 


--------------------------------------------------------------------------------
/rpc/.settings/.jsdtscope:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" path="src/main/webapp"/>
 4 | 	<classpathentry kind="src" path="target/m2e-wtp/web-resources"/>
 5 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.JRE_CONTAINER"/>
 6 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.WebProject">
 7 | 		<attributes>
 8 | 			<attribute name="hide" value="true"/>
 9 | 		</attributes>
10 | 	</classpathentry>
11 | 	<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.baseBrowserLibrary"/>
12 | 	<classpathentry kind="output" path=""/>
13 | </classpath>
14 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
4 | org.eclipse.jdt.core.compiler.compliance=1.5
5 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
8 | org.eclipse.jdt.core.compiler.source=1.5
9 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.component:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
 2 |     <wb-module deploy-name="rpc">
 3 |         <wb-resource deploy-path="/" source-path="/target/m2e-wtp/web-resources"/>
 4 |         <wb-resource deploy-path="/" source-path="/src/main/webapp" tag="defaultRootSource"/>
 5 |         <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/>
 6 |         <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/resources"/>
 7 |         <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/test/java"/>
 8 |         <property name="context-root" value="rpc"/>
 9 |         <property name="java-output-path" value="/rpc/target/classes"/>
10 |     </wb-module>
11 | </project-modules>
12 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml:
--------------------------------------------------------------------------------
1 | <root>
2 |   <facet id="jst.jaxrs">
3 |     <node name="libprov">
4 |       <attribute name="provider-id" value="jaxrs-no-op-library-provider"/>
5 |     </node>
6 |   </facet>
7 | </root>
8 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.project.facet.core.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <faceted-project>
3 |   <fixed facet="wst.jsdt.web"/>
4 |   <installed facet="java" version="1.5"/>
5 |   <installed facet="jst.web" version="2.3"/>
6 |   <installed facet="jst.jaxrs" version="1.1"/>
7 |   <installed facet="wst.jsdt.web" version="1.0"/>
8 | </faceted-project>
9 | 


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.jsdt.ui.superType.container:
--------------------------------------------------------------------------------
1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.jsdt.ui.superType.name:
--------------------------------------------------------------------------------
1 | Window


--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.validation.prefs:
--------------------------------------------------------------------------------
1 | disabled=06target
2 | eclipse.preferences.version=1
3 | 


--------------------------------------------------------------------------------
/rpc/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <parent>
 4 |     <artifactId>hadoop</artifactId>
 5 |     <groupId>com.xcompany.xproject</groupId>
 6 |     <version>1.0.0-RELEASE</version>
 7 |   </parent>
 8 |   <modelVersion>4.0.0</modelVersion>
 9 |   <artifactId>rpc</artifactId>
10 |   <dependencies>
11 |     <dependency>
12 |       <groupId>junit</groupId>
13 |       <artifactId>junit</artifactId>
14 |       <version>4.12</version>
15 |       <scope>test</scope>
16 |       <exclusions>
17 |         <exclusion>
18 |           <artifactId>hamcrest-core</artifactId>
19 |           <groupId>org.hamcrest</groupId>
20 |         </exclusion>
21 |       </exclusions>
22 |     </dependency>
23 |   </dependencies>
24 | </project>
25 | 
26 | 


--------------------------------------------------------------------------------
/rpc/logs/hdfs.log:
--------------------------------------------------------------------------------
 1 | [INFO ] 2017-12-08 10:31:32.239 [] [] [main] org.apache.hadoop.ipc.CallQueueManager.<init>(CallQueueManager.java:57) Using callQueue: class java.util.concurrent.LinkedBlockingQueue queueCapacity: 100
 2 | [INFO ] 2017-12-08 10:31:32.875 [] [] [Socket Reader #1 for port 8888] org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:722) Starting Socket Reader #1 for port 8888
 3 | [WARN ] 2017-12-08 10:31:33.401 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.<clinit>(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 4 | [INFO ] 2017-12-08 10:31:33.470 [] [] [main] com.xcompany.xproject.rpc.HelloServer.main(HelloServer.java:32) Server start to listen on 8888
 5 | [INFO ] 2017-12-08 10:31:33.486 [] [] [IPC Server listener on 8888] org.apache.hadoop.ipc.Server$Listener.run(Server.java:801) IPC Server listener on 8888: starting
 6 | [INFO ] 2017-12-08 10:31:33.487 [] [] [IPC Server Responder] org.apache.hadoop.ipc.Server$Responder.run(Server.java:962) IPC Server Responder: starting
 7 | [WARN ] 2017-12-08 10:31:50.842 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.<clinit>(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 8 | [INFO ] 2017-12-08 10:31:51.734 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) JunneYang
 9 | [INFO ] 2017-12-08 10:31:51.761 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:34) Hello JunneYang
10 | [INFO ] 2017-12-08 10:31:51.763 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) Ares
11 | [INFO ] 2017-12-08 10:31:51.766 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:36) Hello Ares
12 | 


--------------------------------------------------------------------------------
/rpc/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 	
 7 | 	<parent>
 8 | 		<groupId>com.xcompany.xproject</groupId>
 9 | 		<artifactId>hadoop</artifactId>
10 | 		<version>1.0.0-RELEASE</version>
11 | 	</parent>
12 | 	
13 | 	<artifactId>rpc</artifactId>
14 | 	<packaging>jar</packaging>
15 | 
16 | 	<dependencies>
17 | 	   <dependency>
18 |             <groupId>org.apache.hadoop</groupId>
19 |             <artifactId>hadoop-client</artifactId>
20 |             <version>2.7.4</version>
21 |         </dependency>
22 | 	</dependencies>
23 | 
24 | </project>
25 | 


--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/App.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.rpc;
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | public class App 
 8 | {
 9 |     public static void main( String[] args )
10 |     {
11 |         System.out.println( "Hello World!" );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloClient.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.rpc;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.InetSocketAddress;
 5 | 
 6 | import org.apache.hadoop.conf.Configuration;
 7 | import org.apache.hadoop.ipc.RPC;
 8 | import org.junit.After;
 9 | import org.junit.Before;
10 | import org.junit.Test;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 | 
14 | public class HelloClient {
15 | 
16 | 	private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class);
17 | 	
18 | 	@Before
19 | 	public void setUp() {
20 | 	}
21 | 	@After
22 | 	public void tearDown() {
23 | 	}
24 | 	
25 | 	@Test
26 | 	public void testHello() throws IOException {
27 | 		String bindAddress = "node-01";
28 | 		int port = 8888;
29 | 		InetSocketAddress addr = new InetSocketAddress(bindAddress, port);
30 | 		HelloProtocol proxy = RPC.getProxy(
31 | 				HelloProtocol.class, HelloProtocol.versionID, 
32 | 				addr, new Configuration());
33 | 		String resp = proxy.helloMethod("JunneYang");
34 | 		LOGGER.info(resp);
35 | 		resp = proxy.helloMethod("Ares");
36 | 		LOGGER.info(resp);
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloProtocol.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 | 
3 | public interface HelloProtocol {
4 | 	
5 | 	public static final long versionID = 1L;
6 | 	public String helloMethod(String name);
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloServer.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.rpc;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.hadoop.HadoopIllegalArgumentException;
 6 | import org.apache.hadoop.conf.Configuration;
 7 | import org.apache.hadoop.ipc.RPC;
 8 | import org.apache.hadoop.ipc.RPC.Builder;
 9 | import org.apache.hadoop.ipc.RPC.Server;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 | 
13 | public class HelloServer implements HelloProtocol {
14 | 
15 | 	private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class);
16 | 	
17 | 	public String helloMethod(String name) {
18 | 		LOGGER.info(name);
19 | 		return "Hello " + name;
20 | 	}
21 | 	
22 | 	public static void main(String[] args) throws HadoopIllegalArgumentException, IOException {
23 | 		Configuration conf = new Configuration();
24 | 		Builder builder = new RPC.Builder(conf);
25 | 		String bindAddress = "node-01";
26 | 		int port = 8888;
27 | 		builder.setBindAddress(bindAddress)
28 | 			.setPort(8888)
29 | 			.setProtocol(HelloProtocol.class)
30 | 			.setInstance(new HelloServer());
31 | 		Server server = builder.build();
32 | 		LOGGER.info("Server start to listen on " + port);
33 | 		server.start();
34 | 	}
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/rpc/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ### direct log messages to stdout ###
 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.Target = System.out
 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
 6 | 
 7 | ### direct messages to file test.log ###
 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
 9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 | 
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 | 
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 | 
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false  
28 | 
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 | 


--------------------------------------------------------------------------------
/rpc/src/test/java/com/xcompany/xproject/rpc/AppTest.java:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.rpc;
 2 | 
 3 | import junit.framework.Test;
 4 | import junit.framework.TestCase;
 5 | import junit.framework.TestSuite;
 6 | 
 7 | /**
 8 |  * Unit test for simple App.
 9 |  */
10 | public class AppTest 
11 |     extends TestCase
12 | {
13 |     /**
14 |      * Create the test case
15 |      *
16 |      * @param testName name of the test case
17 |      */
18 |     public AppTest( String testName )
19 |     {
20 |         super( testName );
21 |     }
22 | 
23 |     /**
24 |      * @return the suite of tests being tested
25 |      */
26 |     public static Test suite()
27 |     {
28 |         return new TestSuite( AppTest.class );
29 |     }
30 | 
31 |     /**
32 |      * Rigourous Test :-)
33 |      */
34 |     public void testApp()
35 |     {
36 |         assertTrue( true );
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/scala/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/scala">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/scala">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
16 | 		<attributes>
17 | 			<attribute name="maven.pomderived" value="true"/>
18 | 		</attributes>
19 | 	</classpathentry>
20 | 	<classpathentry kind="con" path="org.scala-ide.sdt.launching.SCALA_CONTAINER"/>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
22 | 	<classpathentry kind="output" path="target/classes"/>
23 | </classpath>
24 | 


--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/scala/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>scala</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.scala-ide.sdt.core.scalabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
21 | 		<nature>org.scala-ide.sdt.core.scalanature</nature>
22 | 		<nature>org.eclipse.jdt.core.javanature</nature>
23 | 	</natures>
24 | </projectDescription>
25 | 


--------------------------------------------------------------------------------
/scala/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
 6 | org.eclipse.jdt.core.compiler.compliance=1.5
 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.5
14 | 


--------------------------------------------------------------------------------
/scala/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/scala/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <project
  3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
  4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  5 | 	<modelVersion>4.0.0</modelVersion>
  6 | 
  7 | 	<parent>
  8 | 		<groupId>com.xcompany.xproject</groupId>
  9 | 		<artifactId>hadoop</artifactId>
 10 | 		<version>1.0.0-RELEASE</version>
 11 | 	</parent>
 12 | 
 13 | 	<artifactId>scala</artifactId>
 14 | 
 15 | 	<properties>
 16 | 		<!-- <spark.version>2.2.0</spark.version> -->
 17 | 		<scala.version>2.11.11</scala.version>
 18 | 		<!-- <spark.artifact>2.11</spark.artifact> -->
 19 | 	</properties>
 20 | 
 21 | 	<dependencies>
 22 | 		<!-- <dependency>
 23 | 			<groupId>org.scala-lang</groupId>
 24 | 			<artifactId>scala-library</artifactId>
 25 | 			<version>${scala.version}</version>
 26 | 		</dependency> -->
 27 | 		<dependency>
 28 | 			<groupId>junit</groupId>
 29 | 			<artifactId>junit</artifactId>
 30 | 			<version>4.13.1</version>
 31 | 			<scope>test</scope>
 32 | 		</dependency>
 33 | 		<dependency>
 34 | 			<groupId>org.specs</groupId>
 35 | 			<artifactId>specs</artifactId>
 36 | 			<version>1.2.5</version>
 37 | 			<scope>test</scope>
 38 | 		</dependency>
 39 | 
 40 | 
 41 | 		<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-network-common -->
 42 | 		<!-- <dependency>
 43 | 			<groupId>org.apache.spark</groupId>
 44 | 			<artifactId>spark-network-common_${spark.artifact}</artifactId>
 45 | 			<version>${spark.version}</version>
 46 | 		</dependency> -->
 47 | 		<!-- <dependency>
 48 | 			<groupId>org.apache.spark</groupId>
 49 | 			<artifactId>spark-core_${spark.artifact}</artifactId>
 50 | 			<version>${spark.version}</version>
 51 | 		</dependency>
 52 | 		<dependency>
 53 | 			<groupId>org.apache.spark</groupId>
 54 | 			<artifactId>spark-streaming_${spark.artifact}</artifactId>
 55 | 			<version>${spark.version}</version>
 56 | 		</dependency>
 57 | 		<dependency>
 58 | 			<groupId>org.apache.spark</groupId>
 59 | 			<artifactId>spark-sql_${spark.artifact}</artifactId>
 60 | 			<version>${spark.version}</version>
 61 | 		</dependency> -->
 62 | 		<!-- <dependency>
 63 | 			<groupId>org.apache.spark</groupId>
 64 | 			<artifactId>spark-hive_${spark.artifact}</artifactId>
 65 | 			<version>${spark.version}</version>
 66 | 		</dependency> -->
 67 | 		<!-- <dependency>
 68 | 			<groupId>org.apache.spark</groupId>
 69 | 			<artifactId>spark-mllib_${spark.artifact}</artifactId>
 70 | 			<version>${spark.version}</version>
 71 | 		</dependency> -->
 72 | 
 73 | 	</dependencies>
 74 | 
 75 | 	<repositories>
 76 | 		<repository>
 77 | 			<id>scala-tools.org</id>
 78 | 			<name>Scala-Tools Maven2 Repository</name>
 79 | 			<url>http://scala-tools.org/repo-releases</url>
 80 | 		</repository>
 81 | 	</repositories>
 82 | 	<pluginRepositories>
 83 | 		<pluginRepository>
 84 | 			<id>scala-tools.org</id>
 85 | 			<name>Scala-Tools Maven2 Repository</name>
 86 | 			<url>http://scala-tools.org/repo-releases</url>
 87 | 		</pluginRepository>
 88 | 	</pluginRepositories>
 89 | 	<build>
 90 | 		<sourceDirectory>src/main/scala</sourceDirectory>
 91 | 		<testSourceDirectory>src/test/scala</testSourceDirectory>
 92 | 		<plugins>
 93 | 			<plugin>
 94 | 				<groupId>org.scala-tools</groupId>
 95 | 				<artifactId>maven-scala-plugin</artifactId>
 96 | 				<!-- <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> 
 97 | 					</goals> </execution> </executions> -->
 98 | 				<configuration>
 99 | 					<scalaVersion>${scala.version}</scalaVersion>
100 | 					<args>
101 | 						<arg>-target:jvm-1.8</arg>
102 | 					</args>
103 | 				</configuration>
104 | 			</plugin>
105 | 			<plugin>
106 | 				<artifactId>maven-eclipse-plugin</artifactId>
107 | 				<configuration>
108 | 					<downloadSources>true</downloadSources>
109 | 					<buildcommands>
110 | 						<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
111 | 					</buildcommands>
112 | 					<additionalProjectnatures>
113 | 						<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
114 | 					</additionalProjectnatures>
115 | 					<classpathContainers>
116 | 						<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
117 | 						<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
118 | 					</classpathContainers>
119 | 				</configuration>
120 | 			</plugin>
121 | 		</plugins>
122 | 	</build>
123 | 	<reporting>
124 | 		<plugins>
125 | 			<plugin>
126 | 				<groupId>org.scala-tools</groupId>
127 | 				<artifactId>maven-scala-plugin</artifactId>
128 | 				<configuration>
129 | 					<scalaVersion>${scala.version}</scalaVersion>
130 | 				</configuration>
131 | 			</plugin>
132 | 		</plugins>
133 | 	</reporting>
134 | </project>
135 | 
136 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/com/xcompany/xproject/scala/App.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.scala
 2 | 
 3 | /**
 4 | * http://blog.csdn.net/wuyinxian/article/details/38727717
 5 | * http://download.scala-ide.org/sdk/helium/e38/scala211/stable/site
 6 | */
 7 | 
 8 | 
 9 | object App {
10 |     def main(args: Array[String]): Unit = {
11 |         println("Hello World!");
12 |     }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/spark-streaming.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark-streaming.zip


--------------------------------------------------------------------------------
/spark/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/scala">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/scala">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
16 | 		<attributes>
17 | 			<attribute name="maven.pomderived" value="true"/>
18 | 		</attributes>
19 | 	</classpathentry>
20 | 	<classpathentry kind="con" path="org.scala-ide.sdt.launching.SCALA_CONTAINER"/>
21 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
22 | 	<classpathentry kind="output" path="target/classes"/>
23 | </classpath>
24 | 


--------------------------------------------------------------------------------
/spark/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/spark/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>spark</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.scala-ide.sdt.core.scalabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.scala-ide.sdt.core.scalanature</nature>
21 | 		<nature>org.eclipse.jdt.core.javanature</nature>
22 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
23 | 	</natures>
24 | </projectDescription>
25 | 


--------------------------------------------------------------------------------
/spark/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 | 


--------------------------------------------------------------------------------
/spark/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427870000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427870000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427880000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427880000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427890000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427890000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427900000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427900000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427910000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427910000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427920000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427920000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427930000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427930000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427940000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427940000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427950000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427950000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427960000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427960000.crc


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427870000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427870000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427880000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427880000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427890000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427890000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427900000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427900000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427910000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427910000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427920000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427920000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427930000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427930000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427940000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427940000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427950000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427950000


--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427960000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427960000


--------------------------------------------------------------------------------
/spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017


--------------------------------------------------------------------------------
/spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/App.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | object App {
 8 |     def main(args: Array[String]): Unit = {
 9 |         println("Hello World!");
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/WordCount.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.SparkContext
 5 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
 6 | 
 7 | object WordCount {
 8 |     def main(args: Array[String]): Unit = {
 9 | //        println(args(0))
10 |         //        println("Hello World!")
11 |         val conf = new SparkConf().setAppName("WordCount")
12 | //                    .setMaster("spark://node-01:7077")
13 | //                    .setMaster("local")
14 | //                    .set("spark.executor.memory", "1g")
15 | //                    .set("spark.cores.max", "1")
16 |         val sc = new SparkContext(conf)
17 | 
18 |         val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/")
19 |         val words = lines.flatMap(line => line.split(" "))
20 |         val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b)
21 |         wordCounts.collect().foreach(println)
22 |         wordCounts.partitions.length
23 | //        wordCounts.saveAsTextFile("file:///tmp/output")
24 | 
25 |         sc.stop()
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/BroadcastWrapper.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark.streaming
 2 | 
 3 | import scala.collection.mutable
 4 | 
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.broadcast.Broadcast
 7 | import java.io.ObjectInputStream
 8 | import java.io.ObjectOutputStream
 9 | import java.util.Calendar
10 | import java.text.SimpleDateFormat
11 | 
12 | //http://spark.apache.org/docs/2.2.0/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints
13 | object BroadcastWrapper {
14 |     @volatile private var instance: Broadcast[Map[String, String]] = null
15 |     private val map = mutable.LinkedHashMap[String, String]()
16 | 
17 |     def getUpdateInfo(): Map[String, String] = {
18 |         val jedis_driver = RedisClient.pool.getResource
19 |         println("=====GET_DRIVER")
20 |         val is_update = jedis_driver.lpop("is_update")
21 |         println("is_update: " + is_update)
22 |         println("=====READ_DRIVER")
23 | 
24 |         //        if (null == is_update) {
25 |         //            rdd.sparkContext.broadcast(is_update)
26 |         //        }
27 | 
28 |         jedis_driver.close()
29 |         println("=====CLOSE_DRIVER")
30 | 
31 |         map += ("is_update" -> is_update)
32 |         val broadcast_info = jedis_driver.get("broadcast_info")
33 |         map += ("broadcast_info" -> broadcast_info)
34 |         map.toMap
35 |     }
36 | 
37 |     def getInstance(sc: SparkContext): Broadcast[Map[String, String]] = {
38 |         if (instance == null) {
39 |             synchronized {
40 |                 if (instance == null) {
41 |                     val updateInfo = getUpdateInfo()
42 |                     // https://www.jianshu.com/p/95896d06a94d
43 |                     if (Some(null) != updateInfo.get("is_update")) {
44 |                         instance = sc.broadcast(updateInfo)
45 |                     }
46 |                 }
47 |             }
48 |         }
49 |         instance
50 |     }
51 | 
52 |     def broadcastInfo(sc: SparkContext, blocking: Boolean = false): Unit = {
53 |         val updateInfo = getUpdateInfo()
54 |         // https://www.jianshu.com/p/95896d06a94d
55 |         if (Some(null) != updateInfo.get("is_update")) {
56 |             if (instance != null) {
57 |                 instance.unpersist(blocking)
58 |             }
59 |             instance = sc.broadcast(updateInfo)
60 |         }
61 | 
62 |         //        val calendar = Calendar.getInstance()
63 |         //        val date = calendar.getTime()
64 |         //        val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")
65 |         //        val dateFormat = format.format(date)
66 |         //        println("=====broadcat success: " + dateFormat)
67 |     }
68 | 
69 |     //    private def writeObject(out: ObjectOutputStream): Unit = {
70 |     //        out.writeObject(instance)
71 |     //    }
72 |     //
73 |     //    private def readObject(in: ObjectInputStream): Unit = {
74 |     //        instance = in.readObject().asInstanceOf[Broadcast[Map[String, String]]]
75 |     //    }
76 | 
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/NetworkWordCount.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark.streaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.Seconds
 5 | import org.apache.spark.streaming.StreamingContext
 6 | import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
 7 | 
 8 | object NetworkWordCount {
 9 |     def main(args: Array[String]): Unit = {
10 |         val conf = new SparkConf().setAppName("NetworkWordCount")
11 | //                    .setMaster("spark://node-01:7077")
12 | //                    .setMaster("local")
13 | //                    .set("spark.executor.memory", "1g")
14 | //                    .set("spark.cores.max", "1")
15 |         val ssc = new StreamingContext(conf, Seconds(10))
16 |         val lines = ssc.socketTextStream("node-01", 9999)
17 |         val words = lines.flatMap(_.split(" "))
18 |         val pairs = words.map(word => (word, 1))
19 |         val wordCounts = pairs.reduceByKey(_ + _)
20 |         wordCounts.print()
21 |         
22 |      
23 |         wordCounts.foreachRDD { rdd =>
24 |               rdd.foreachPartition { partitionOfRecords => {
25 | //                val connection = ConnectionPool.getConnection()
26 |                 partitionOfRecords.foreach(record => {
27 | //                    val sql = "insert into streaming_itemcount(item,count) values('" + record._1 + "'," + record._2 + ")"
28 | //                    val stmt = connection.createStatement
29 | //                    stmt.executeUpdate(sql)
30 |                        println(record)
31 |                 })
32 | //                ConnectionPool.returnConnection(connection)
33 |               }}
34 |         }
35 |         
36 |         ssc.start()             // Start the computation
37 |         ssc.awaitTermination()  // Wait for the computation to terminate
38 |     }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/RedisClient.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark.streaming
 2 | 
 3 | import org.apache.commons.pool2.impl.GenericObjectPoolConfig
 4 | import redis.clients.jedis.JedisPool
 5 | import redis.clients.jedis.JedisPoolConfig
 6 | 
 7 | //http://blog.csdn.net/qq_26525215/article/details/60466222
 8 | //https://segmentfault.com/a/1190000005085077
 9 | object RedisClient {
10 |     val host = "node-04"
11 |     val port = 63791
12 |     val timeout = 50000
13 |     val password = "123456"
14 |     val database = 0
15 |     val maxTotal = 10
16 |     val maxIdle = 5
17 |     val maxWaitMillis = timeout
18 |     val testOnBorrow = true
19 |     
20 |     val config = new JedisPoolConfig
21 |     config.setMaxTotal(maxTotal)
22 |     config.setMaxIdle(maxIdle)
23 |     config.setMaxWaitMillis(maxWaitMillis)
24 |     config.setTestOnBorrow(testOnBorrow)
25 |     
26 |     // must lazy
27 |     lazy val pool = new JedisPool(config, host, port, timeout, password, database)
28 | 
29 |     lazy val hook = new Thread {
30 |         override def run = {
31 |             println("Execute hook thread: " + this)
32 |             pool.destroy()
33 |         }
34 |     }
35 |     sys.addShutdownHook(hook.run)
36 | }
37 | 


--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/WaitForReady.scala:
--------------------------------------------------------------------------------
 1 | package com.xcompany.xproject.spark.streaming
 2 | 
 3 | object WaitForReady {
 4 |     private val PREFIX = "streaming"
 5 |     private val IS_READY = PREFIX + ":is_ready"
 6 |     
 7 |     def waitForReady(): Unit = {
 8 |         val jedis_main = RedisClient.pool.getResource
 9 |         var is_ready = jedis_main.get(IS_READY)
10 |             print(is_ready.toBoolean)
11 |         while (null == is_ready) {
12 |             println("is_ready: " + is_ready + ", continue waitFor...")
13 |             Thread.sleep(5000)
14 |             is_ready = jedis_main.get("is_ready")
15 |         }
16 |         jedis_main.close()
17 |         println("is_ready: " + is_ready + ", start to submitJob...")        
18 |     }
19 |     
20 |     def main(args: Array[String]): Unit = {
21 |       waitForReady()
22 |     }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/spark/src/test/scala/com/xcompany/xproject/spark/AppTest.scala:
--------------------------------------------------------------------------------
 1 | //package com.xcompany.xproject.spark
 2 | //
 3 | //import org.junit._
 4 | //import Assert._
 5 | //
 6 | //@Test
 7 | //class AppTest {
 8 | //
 9 | //    @Test
10 | //    def testOK() = assertTrue(true)
11 | //
12 | ////    @Test
13 | ////    def testKO() = assertTrue(false)
14 | //
15 | //}
16 | //
17 | //
18 | 


--------------------------------------------------------------------------------
/spark/src/test/scala/com/xcompany/xproject/spark/MySpec.scala:
--------------------------------------------------------------------------------
 1 | //package com.xcompany.xproject.spark
 2 | //
 3 | //import org.specs._
 4 | //import org.specs.runner.{ConsoleRunner, JUnit4}
 5 | //
 6 | //class MySpecTest extends JUnit4(MySpec)
 7 | ////class MySpecSuite extends ScalaTestSuite(MySpec)
 8 | //object MySpecRunner extends ConsoleRunner(MySpec)
 9 | //
10 | //object MySpec extends Specification {
11 | //  "This wonderful system" should {
12 | //    "save the world" in {
13 | //      val list = Nil
14 | //      list must beEmpty
15 | //    }
16 | //  }
17 | //}
18 | 


--------------------------------------------------------------------------------
/spark_data/a.txt:
--------------------------------------------------------------------------------
1 | hello tom
2 | hello jerry
3 | hello tom
4 | 


--------------------------------------------------------------------------------
/spark_data/b.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello jerry
3 | tom jerry
4 | 


--------------------------------------------------------------------------------
/spark_data/c.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello tom
3 | 


--------------------------------------------------------------------------------
/sparkstreaming.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/sparkstreaming.zip


--------------------------------------------------------------------------------
/storm-kafka.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm-kafka.zip


--------------------------------------------------------------------------------
/storm.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm.zip


--------------------------------------------------------------------------------
/tensorflow/01-TemsorFlow的模块与API.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/01-TemsorFlow的模块与API.png


--------------------------------------------------------------------------------
/tensorflow/02-TensorFlow架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/02-TensorFlow架构.png


--------------------------------------------------------------------------------
/tensorflow/02-TensorFlow核心基础知识.txt:
--------------------------------------------------------------------------------
 1 | 一、Tensor张量使用（可以使用constant、variable、placeholder进行赋值）
 2 | import tensorflow as tf
 3 | tf.constant("Hello, TensorFlow", dtype=tf.string)
 4 | tf.constant([1, 2, 3, 4, 5], dtype=tf.int32)
 5 | tf.Variable([[1, 2], [3, 4]], dtype=tf.int32)
 6 | tf.zeros(shape=(2, 3, 4), dtype=tf.int32)
 7 | tf.constant(0, shape=(2, 3, 4), dtype=tf.int32)
 8 | a = tf.constant(0, shape=(2, 3, 4), dtype=tf.int32)
 9 | tf.rank(a)
10 | 
11 | 二、Variable变量使用（2.X版本之前默认是懒加载，2.X之后是即时加载；
12 | 2.X版本之前使用Saver、之后使用Checkpoint进行变量的存储与恢复操作）
13 | import tensorflow as tf
14 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1))
15 | b = tf.Variable(tf.zeros([10]))
16 | print([W, b])
17 | b.assign(b + tf.constant(1.0, shape=[10]))
18 | checkpoint = tf.train.Checkpoint(W=W, b=b)
19 | checkpoint.save('./demo/demo-model')
20 | b.assign(b + tf.constant(1.0, shape=[10]))
21 | print(W, b)
22 | checkpoint.restore('./demo/demo-model-1')
23 | print(W, b)
24 | 
25 | 
26 | import tensorflow as tf
27 | tf.compat.v1.disable_eager_execution()
28 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1))
29 | b = tf.Variable(tf.zeros([10]))
30 | print([W, b])
31 | sess = tf.compat.v1.Session()
32 | sess.run(tf.compat.v1.global_variables_initializer())
33 | sess.run([W, b])
34 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10])))
35 | saver = tf.compat.v1.train.Saver({'W': W, 'b': b})
36 | saver.save(sess, './demo/demo-model', global_step=0)
37 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10])))
38 | saver.restore(sess, './demo/demo-model-0')
39 | sess.run(b)
40 | 
41 | 三、占位符与操作的使用（2.X版本已经移除了placeholder，使用placeholder定义占位符，使用feed填充占位符）
42 | import tensorflow as tf
43 | tf.compat.v1.disable_eager_execution()
44 | a = tf.constant(123)
45 | b = tf.constant(456)
46 | x = tf.compat.v1.placeholder(tf.int16, shape=())
47 | y = tf.compat.v1.placeholder(tf.int16, shape=())
48 | add = tf.add(x, y)
49 | mul = tf.multiply(x, y)
50 | sess = tf.compat.v1.Session()
51 | sess.run(add, feed_dict={x: 10, y: 5})
52 | sess.run(mul, feed_dict={x: 2, y: 3})
53 | 
54 | 四、查看设备列表
55 | import tensorflow as tf
56 | tf.config.list_physical_devices()
57 | 
58 | from tensorflow.python.client import device_lib
59 | dl = device_lib.list_local_devices()
60 | print(dl)
61 | 
62 | import tensorflow as tf
63 | print([tf.__version__, tf.test.is_gpu_available()])
64 | 


--------------------------------------------------------------------------------
/tensorflow/1-机器学习基础.ipynb:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       


--------------------------------------------------------------------------------
/tensorflow/10-模型定义与查看.ipynb:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         


--------------------------------------------------------------------------------
/tensorflow/4-数据存储DataStore访问.ipynb:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         


--------------------------------------------------------------------------------
/tensorflow/5-注册数据集Dataset.ipynb:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             


--------------------------------------------------------------------------------
/tensorflow/MLOps流水线参考.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/MLOps流水线参考.png


--------------------------------------------------------------------------------
/tensorflow/README:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tensorflow/env/README:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tensorflow/env/aml-demo-conda-dependencies.yaml:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                            


--------------------------------------------------------------------------------
/tensorflow/requests_futures使用参考.txt:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import ThreadPoolExecutor
 2 | from requests_futures.sessions import FuturesSession
 3 | from concurrent.futures import as_completed
 4 | 
 5 | 
 6 | session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
 7 | 
 8 | futures=[session.post(f'http://httpbin.org/post', json={"name": "helloworld-" + str(i)}, headers={"Content-Type":"application/json"}) for i in range(3)]
 9 | 
10 | for future in as_completed(futures):
11 |     resp = future.result()
12 |     print(resp.text)
13 | 
14 | 


--------------------------------------------------------------------------------
/tensorflow/src/README:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tensorflow/src/job-dist.py:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  


--------------------------------------------------------------------------------
/tensorflow/src/job.py:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   


--------------------------------------------------------------------------------
/tensorflow/src/train.py:
--------------------------------------------------------------------------------
1 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    


--------------------------------------------------------------------------------
/tensorflow/性能测试.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/性能测试.png


--------------------------------------------------------------------------------
/我的书签.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/我的书签.rar


--------------------------------------------------------------------------------