├── .classpath
├── .project
├── .settings
├── .jsdtscope
├── org.eclipse.jdt.core.prefs
├── org.eclipse.m2e.core.prefs
├── org.eclipse.wst.common.component
├── org.eclipse.wst.common.project.facet.core.xml
├── org.eclipse.wst.jsdt.ui.superType.container
├── org.eclipse.wst.jsdt.ui.superType.name
└── org.eclipse.wst.validation.prefs
├── LICENSE
├── README.md
├── conf_data
├── HA高可用场景
│ └── HA部署.txt
├── HIVE安装.txt
├── HTTP_20130313143750.dat
├── ZK搭建步骤.txt
├── a.txt
├── b.txt
├── c.txt
├── flowsort-data
├── hadoop-env.sh
├── hbase安装配置
│ ├── backup-masters
│ ├── core-site.xml
│ ├── hbase shell.txt
│ ├── hbase-env.sh
│ ├── hbase-site.xml
│ ├── hbase集群搭建.txt
│ ├── hdfs-site.xml
│ ├── regionservers
│ └── 笔记.txt
├── hive HQL语法示例.txt
├── hive-default.xml.template
├── hive-site.xml
├── hive-udf.txt
├── hive.txt
├── hive安装-视频.txt
├── hive笔记.txt
├── kafka安装配置
│ ├── kafka安装
│ ├── kafka笔记.txt
│ ├── server-1.properties
│ ├── server-2.properties
│ └── server-3.properties
├── order.txt
├── spark安装部署.txt
├── spark运行命令样例.txt
├── storm安装配置
│ ├── storm-trainning-v1.0-zs.ppt
│ ├── storm.yaml
│ └── storm安装手册及笔记.txt
├── udf.txt
├── udt.test.txt
├── word-count.txt
├── zoo.cfg
├── zoo_sample.cfg
└── 非HA场景
│ ├── Hadoop搭建步骤-非HA场景.txt
│ ├── core-site.xml
│ ├── hdfs-site.xml
│ ├── mapred-site.xml
│ ├── masters
│ ├── slaves
│ └── yarn-site.xml
├── data_analyze.jpg
├── data_analyze.png
├── hadoop.jpg
├── hbase
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── xcompany
│ │ │ └── xproject
│ │ │ └── hbase
│ │ │ ├── App.java
│ │ │ └── HBaseTest.java
│ └── resources
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── xcompany
│ └── xproject
│ └── hbase
│ └── AppTest.java
├── hdfs
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── dependency-reduced-pom.xml
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── xcompany
│ │ │ └── xproject
│ │ │ └── hdfs
│ │ │ └── App.java
│ └── resources
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── xcompany
│ └── xproject
│ └── hdfs
│ ├── AppTest.java
│ └── HDFSTest.java
├── hive
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── dependency-reduced-pom.xml
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── xcompany
│ │ │ └── xproject
│ │ │ └── hive
│ │ │ └── Phone2Area.java
│ └── resources
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── xcompany
│ └── xproject
│ └── hive
│ └── AppTest.java
├── kafka
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── xcompany
│ │ └── xproject
│ │ └── kafka
│ │ ├── TestConsumer.java
│ │ └── TestProducer.java
│ └── resources
│ └── log4j.properties
├── mmdetection
├── 1-mmdection安装使用记录.txt
├── 2-mmdection预测新数据.txt
├── 3-mmdection模型指标测试.txt
└── README
├── mr
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── dependency-reduced-pom.xml
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── xcompany
│ │ │ └── xproject
│ │ │ └── mr
│ │ │ ├── App.java
│ │ │ ├── flowpartition
│ │ │ ├── FlowBean.java
│ │ │ ├── FlowPartition.java
│ │ │ ├── FlowPartitionJob.java
│ │ │ ├── FlowPartitionMapper.java
│ │ │ └── FlowPartitionReducer.java
│ │ │ ├── flowsort
│ │ │ ├── FlowBean.java
│ │ │ ├── FlowSortJob.java
│ │ │ ├── FlowSortMapper.java
│ │ │ └── FlowSortReducer.java
│ │ │ ├── flowsum
│ │ │ ├── FlowBean.java
│ │ │ ├── FlowSumJob.java
│ │ │ ├── FlowSumMapper.java
│ │ │ └── FlowSumReducer.java
│ │ │ ├── invertedindex
│ │ │ ├── StepOneJob.java
│ │ │ ├── StepOneMapper.java
│ │ │ ├── StepOneReducer.java
│ │ │ ├── StepTwoJob.java
│ │ │ ├── StepTwoMapper.java
│ │ │ └── StepTwoReducer.java
│ │ │ └── wordcount
│ │ │ ├── WordCountJob.java
│ │ │ ├── WordCountMapper.java
│ │ │ └── WordCountReducer.java
│ └── resources
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── xcompany
│ └── xproject
│ └── mr
│ └── AppTest.java
├── pom.xml
├── rpc
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── .jsdtscope
│ ├── org.eclipse.jdt.core.prefs
│ ├── org.eclipse.m2e.core.prefs
│ ├── org.eclipse.wst.common.component
│ ├── org.eclipse.wst.common.project.facet.core.prefs.xml
│ ├── org.eclipse.wst.common.project.facet.core.xml
│ ├── org.eclipse.wst.jsdt.ui.superType.container
│ ├── org.eclipse.wst.jsdt.ui.superType.name
│ └── org.eclipse.wst.validation.prefs
├── dependency-reduced-pom.xml
├── log
│ └── hdfs.log
├── logs
│ ├── hdfs.log
│ └── hdfs.log.2017-11-06
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── xcompany
│ │ │ └── xproject
│ │ │ └── rpc
│ │ │ ├── App.java
│ │ │ ├── HelloClient.java
│ │ │ ├── HelloProtocol.java
│ │ │ └── HelloServer.java
│ └── resources
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── xcompany
│ └── xproject
│ └── rpc
│ └── AppTest.java
├── scala
├── .cache
├── .cache-main
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ ├── org.eclipse.m2e.core.prefs
│ └── org.scala-ide.sdt.core.prefs
├── pom.xml
└── src
│ └── main
│ └── scala
│ └── com
│ └── xcompany
│ └── xproject
│ └── scala
│ └── App.scala
├── spark-streaming.zip
├── spark
├── .cache-main
├── .cache-tests
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── checkpoint
│ ├── .checkpoint-1514427870000.crc
│ ├── .checkpoint-1514427880000.crc
│ ├── .checkpoint-1514427890000.crc
│ ├── .checkpoint-1514427900000.crc
│ ├── .checkpoint-1514427910000.crc
│ ├── .checkpoint-1514427920000.crc
│ ├── .checkpoint-1514427930000.crc
│ ├── .checkpoint-1514427940000.crc
│ ├── .checkpoint-1514427950000.crc
│ ├── .checkpoint-1514427960000.crc
│ ├── checkpoint-1514427870000
│ ├── checkpoint-1514427880000
│ ├── checkpoint-1514427890000
│ ├── checkpoint-1514427900000
│ ├── checkpoint-1514427910000
│ ├── checkpoint-1514427920000
│ ├── checkpoint-1514427930000
│ ├── checkpoint-1514427940000
│ ├── checkpoint-1514427950000
│ ├── checkpoint-1514427960000
│ └── receivedBlockMetadata
│ │ ├── log-1514427870017-1514427930017
│ │ └── log-1514427932107-1514427992107
├── pom.xml
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── xcompany
│ │ └── xproject
│ │ └── spark
│ │ ├── App.scala
│ │ ├── WordCount.scala
│ │ └── streaming
│ │ ├── BroadcastWrapper.scala
│ │ ├── KafkaWordCount.scala
│ │ ├── NetworkWordCount.scala
│ │ ├── RedisClient.scala
│ │ └── WaitForReady.scala
│ └── test
│ └── scala
│ └── com
│ └── xcompany
│ └── xproject
│ └── spark
│ ├── AppTest.scala
│ └── MySpec.scala
├── spark_data
├── a.txt
├── b.txt
└── c.txt
├── sparkstreaming.zip
├── storm-kafka.zip
├── storm.zip
├── tensorflow
├── 01-TemsorFlow的模块与API.png
├── 01-TensorFlow基本概念与HelloWorld.txt
├── 02-TensorFlow架构.png
├── 02-TensorFlow核心基础知识.txt
├── 03-MNIST手写体数据集训练.txt
├── 1-机器学习基础.ipynb
├── 10-模型定义与查看.ipynb
├── 11-AML本地交互式训练.ipynb
├── 12-AML远程单节点训练.ipynb
├── 13-AML远程分布式训练.ipynb
├── 2-MNIST手写体数据集.ipynb
├── 3-工作区workspace访问.ipynb
├── 4-数据存储DataStore访问.ipynb
├── 5-注册数据集Dataset.ipynb
├── 6-标记数据集Dataset.ipynb
├── 7-加载数据集Dataset.ipynb
├── 8-规范化数据集Dataset.ipynb
├── 9-统计分析数据集Dataset.ipynb
├── MLOps流水线参考.png
├── README
├── env
│ ├── README
│ └── aml-demo-conda-dependencies.yaml
├── requests_futures使用参考.txt
├── src
│ ├── README
│ ├── job-dist.py
│ ├── job.py
│ ├── train-dist.py
│ └── train.py
└── 性能测试.png
└── 我的书签.rar
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hadoop
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.wst.jsdt.core.javascriptValidator
10 |
11 |
12 |
13 |
14 | org.eclipse.jdt.core.javabuilder
15 |
16 |
17 |
18 |
19 | org.eclipse.wst.common.project.facet.core.builder
20 |
21 |
22 |
23 |
24 | org.eclipse.m2e.core.maven2Builder
25 |
26 |
27 |
28 |
29 | org.eclipse.wst.validation.validationbuilder
30 |
31 |
32 |
33 |
34 |
35 | org.eclipse.jem.workbench.JavaEMFNature
36 | org.eclipse.wst.common.modulecore.ModuleCoreNature
37 | org.eclipse.jdt.core.javanature
38 | org.eclipse.m2e.core.maven2Nature
39 | org.eclipse.wst.common.project.facet.core.nature
40 | org.eclipse.wst.jsdt.core.jsNature
41 |
42 |
43 |
--------------------------------------------------------------------------------
/.settings/.jsdtscope:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.8
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.8
14 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.common.component:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.common.project.facet.core.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.jsdt.ui.superType.container:
--------------------------------------------------------------------------------
1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary
--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.jsdt.ui.superType.name:
--------------------------------------------------------------------------------
1 | Window
--------------------------------------------------------------------------------
/.settings/org.eclipse.wst.validation.prefs:
--------------------------------------------------------------------------------
1 | disabled=06target
2 | eclipse.preferences.version=1
3 |
--------------------------------------------------------------------------------
/conf_data/HA高可用场景/HA部署.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/HA高可用场景/HA部署.txt
--------------------------------------------------------------------------------
/conf_data/HTTP_20130313143750.dat:
--------------------------------------------------------------------------------
1 | 1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200
2 | 1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 200
3 | 1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 200
4 | 1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 200
5 | 1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 200
6 | 1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 200
7 | 1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 200
8 | 1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.xxx.cn 信息安全 20 20 3156 2936 200
9 | 1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 200
10 | 1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960 690 200
11 | 1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 200
12 | 1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 200
13 | 1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 200
14 | 1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 200
15 | 1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 200
16 | 1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 12 12 3008 3720 200
17 | 1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 200
18 | 1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 200
19 | 1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 200
20 | 1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 2 2 120 120 200
21 | 1363157985079 13823070001 20-7C-8F-70-68-1F:CMCC 120.196.100.99 6 3 360 180 200
22 | 1363157985069 13600217502 00-1F-64-E2-E8-B1:CMCC 120.196.100.55 18 138 1080 186852 200
23 |
--------------------------------------------------------------------------------
/conf_data/ZK搭建步骤.txt:
--------------------------------------------------------------------------------
1 | 版本选择: http://blog.csdn.net/anningzhu/article/details/60468723
2 | http://hbase.apache.org/book.html#zookeeper.requirements
3 | 选择:zookeeper-3.4.9.tar.gz
4 |
5 | /home/xxproject/lib
6 | tar -xzvf zookeeper-3.4.9.tar.gz
7 | ln -sf zookeeper-3.4.9 zookeeper
8 | mkdir -p /home/xxproject/data/zookeeper
9 | zoo.cfg 配置拷贝到 /home/xxproject/lib/zookeeper-3.4.9/conf 下面
10 |
11 | echo '
12 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
13 | export ZK_HOME=/home/xxproject/lib/zookeeper
14 | export PATH=${PATH}:${ZK_HOME}/bin
15 | ' >> ~/.bash_profile
16 | source ~/.bash_profile
17 |
18 | 三台机器分别执行
19 | echo 1 > /home/xxproject/data/zookeeper/myid
20 | echo 2 > /home/xxproject/data/zookeeper/myid
21 | echo 3 > /home/xxproject/data/zookeeper/myid
22 |
23 | # 启动ZK
24 | #./zookeeper/bin/zkServer.sh start
25 | #./zookeeper/bin/zkServer.sh status
26 | # bin/zkCli.sh -server 127.0.0.1:2181
27 | zkServer.sh start
28 | zkServer.sh status
29 | # jps QuorumPeerMain
30 |
--------------------------------------------------------------------------------
/conf_data/a.txt:
--------------------------------------------------------------------------------
1 | hello tom
2 | hello jerry
3 | hello tom
4 |
--------------------------------------------------------------------------------
/conf_data/b.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello jerry
3 | tom jerry
4 |
--------------------------------------------------------------------------------
/conf_data/c.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello tom
3 |
--------------------------------------------------------------------------------
/conf_data/flowsort-data:
--------------------------------------------------------------------------------
1 | 13480253104 180 180 360
2 | 13502468823 7335 110349 117684
3 | 13560439658 2034 5892 7926
4 | 13600217502 1080 186852 187932
5 | 13602846565 1938 2910 4848
6 | 13660577991 6960 690 7650
7 | 13719199419 240 0 240
8 | 13726230503 2481 24681 27162
9 | 13760778710 120 120 240
10 | 13823070001 360 180 540
11 | 13826544101 264 0 264
12 | 13922314466 3008 3720 6728
13 | 13925057413 11058 48243 59301
14 | 13926251106 240 0 240
15 | 13926435656 132 1512 1644
16 | 15013685858 3659 3538 7197
17 | 15920133257 3156 2936 6092
18 | 15989002119 1938 180 2118
19 | 18211575961 1527 2106 3633
20 | 18320173382 9531 2412 11943
21 | 84138413 4116 1432 5548
22 |
--------------------------------------------------------------------------------
/conf_data/hadoop-env.sh:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Set Hadoop-specific environment variables here.
18 |
19 | # The only required environment variable is JAVA_HOME. All others are
20 | # optional. When running a distributed configuration it is best to
21 | # set JAVA_HOME in this file, so that it is correctly defined on
22 | # remote nodes.
23 |
24 | # The java implementation to use.
25 | JAVA_HOME=/home/xxproject/lib/jdk
26 | export JAVA_HOME=${JAVA_HOME}
27 |
28 | # The jsvc implementation to use. Jsvc is required to run secure datanodes
29 | # that bind to privileged ports to provide authentication of data transfer
30 | # protocol. Jsvc is not required if SASL is configured for authentication of
31 | # data transfer protocol using non-privileged ports.
32 | #export JSVC_HOME=${JSVC_HOME}
33 |
34 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
35 |
36 | # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
37 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
38 | if [ "$HADOOP_CLASSPATH" ]; then
39 | export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
40 | else
41 | export HADOOP_CLASSPATH=$f
42 | fi
43 | done
44 |
45 | # The maximum amount of heap to use, in MB. Default is 1000.
46 | #export HADOOP_HEAPSIZE=
47 | #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
48 |
49 | # Extra Java runtime options. Empty by default.
50 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
51 |
52 | # Command specific options appended to HADOOP_OPTS when specified
53 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
54 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
55 |
56 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
57 |
58 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
59 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
60 |
61 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
62 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
63 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
64 |
65 | # On secure datanodes, user to run the datanode as after dropping privileges.
66 | # This **MUST** be uncommented to enable secure HDFS if using privileged ports
67 | # to provide authentication of data transfer protocol. This **MUST NOT** be
68 | # defined if SASL is configured for authentication of data transfer protocol
69 | # using non-privileged ports.
70 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
71 |
72 | # Where log files are stored. $HADOOP_HOME/logs by default.
73 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
74 |
75 | # Where log files are stored in the secure data environment.
76 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
77 |
78 | ###
79 | # HDFS Mover specific parameters
80 | ###
81 | # Specify the JVM options to be used when starting the HDFS Mover.
82 | # These options will be appended to the options specified as HADOOP_OPTS
83 | # and therefore may override any similar flags set in HADOOP_OPTS
84 | #
85 | # export HADOOP_MOVER_OPTS=""
86 |
87 | ###
88 | # Advanced Users Only!
89 | ###
90 |
91 | # The directory where pid files are stored. /tmp by default.
92 | # NOTE: this should be set to a directory that can only be written to by
93 | # the user that will run the hadoop daemons. Otherwise there is the
94 | # potential for a symlink attack.
95 | export HADOOP_PID_DIR=${HADOOP_PID_DIR}
96 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
97 |
98 | # A string representing this instance of hadoop. $USER by default.
99 | export HADOOP_IDENT_STRING=$USER
100 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/backup-masters:
--------------------------------------------------------------------------------
1 | node-03
2 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | fs.defaultFS
23 | hdfs://node-01:9000
24 | The name of the default file system.
25 |
26 |
27 |
28 | hadoop.tmp.dir
29 | /home/xxproject/data/hadoop/tmp
30 | A base for other temporary directories.
31 |
32 |
33 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase shell.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase shell.txt
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 |
26 | hbase.rootdir
27 | hdfs://node-01:9000/hbase
28 |
29 |
30 |
31 | hbase.cluster.distributed
32 | true
33 |
34 |
35 |
36 | hbase.zookeeper.quorum
37 | node-01:2181,node-02:2181,node-03:2181
38 |
39 |
40 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hbase集群搭建.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase集群搭建.txt
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | dfs.namenode.secondary.http-address
23 | node-02:50090
24 | The secondary namenode http server address and port.
25 |
26 |
27 | dfs.namenode.secondary.https-address
28 | node-02:50091
29 | The secondary namenode HTTPS server address and port.
30 |
31 |
32 |
33 | dfs.namenode.http-address
34 | node-01:50070
35 | The address and the base port where the dfs namenode web ui will listen on.
36 |
37 |
38 |
39 | dfs.replication
40 | 3
41 |
42 |
43 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/regionservers:
--------------------------------------------------------------------------------
1 | node-02
2 | node-03
3 | node-04
4 |
--------------------------------------------------------------------------------
/conf_data/hbase安装配置/笔记.txt:
--------------------------------------------------------------------------------
1 | cd /home/xxproject/lib
2 | tar -xzvf hbase-1.2.6-bin.tar.gz
3 | ln -sf hbase-1.2.6 hbase
4 |
5 | hbase-env.sh 中添加:
6 | export JAVA_HOME=/home/xxproject/lib/jdk
7 | export HBASE_MANAGES_ZK=false
8 |
9 | hdfs的 core-site.xml、hdfs-site.xml 拷贝到 hbase的配置文件目录---------因为hadoop不是ns的方式,所以应该是不需要的
10 |
11 | 修改hbase-site.xml配置如下:
12 |
13 |
14 | hbase.rootdir
15 | hdfs://node-01:9000/hbase
16 |
17 |
18 |
19 | hbase.cluster.distributed
20 | true
21 |
22 |
23 |
24 | hbase.zookeeper.quorum
25 | node-01:2181,node-02:2181,node-03:2181
26 |
27 |
28 | echo '
29 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
30 | export HBASE_HOME=/home/xxproject/lib/hbase
31 | export PATH=${PATH}:${HBASE_HOME}/bin
32 | ' >> ~/.bash_profile
33 | source ~/.bash_profile
34 |
35 | hmaster--regionserver--hmaster-backup
36 | HMaster执行:
37 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master/hbase-daemon.sh start master
38 | regionserver节点上都执行:
39 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start regionserver
40 | HMaster-BackUp也执行:
41 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master
42 |
43 | 该步骤暂时屏蔽--有问题:
44 | ```5.启动所有的hbase
45 | 分别启动zk
46 | ./zkServer.sh start
47 | 启动hbase集群
48 | start-dfs.sh
49 | 启动hbase,在主节点上运行:
50 | start-hbase.sh
51 | 6.通过浏览器访问hbase管理页面
52 | 192.168.1.201:60010
53 | 7.为保证集群的可靠性,要启动多个HMaster
54 | hbase-daemon.sh start master
55 | ```
56 |
57 | 测试一下:
58 | http://10.20.0.12:16010/master-status
59 | http://10.20.0.12:16030/rs-status
60 | 可以杀死master节点看看备节点会不会升主
61 |
62 | hbase shell 试用一下:
63 | create 'mygirls', {NAME => 'base_info', VERSIONS => 3}, {NAME => 'extra_info'}
64 | describe 'mygirls'
65 | put 'mygirls', '0001', 'base_info:name', 'fengjie'
66 | put 'mygirls', '0001', 'base_info:age', '28'
67 | put 'mygirls', '0001', 'base_info:gender', 'feamle'
68 | put 'mygirls', '0001', 'extra_info:boyfriend', 'xiaoming'
69 | get 'mygirls', '0001'
70 | get 'mygirls', '0001', 'base_info'
71 | get 'mygirls', '0001', 'extra_info:boyfriend'
72 |
73 | put 'mygirls', '0001', 'base_info:name', 'fengbaobao'
74 | get 'mygirls', '0001', {COLUMN => 'base_info', VERSIONS => 3}
75 |
76 | get 只能一次一行数据, 返回多行用scan
77 | scan 'mygirls', {COLUMNS => ['base_info'], LIMIT => 10, STARTROW => '0001', VERSIONS => 10}
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/conf_data/hive HQL语法示例.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive HQL语法示例.txt
--------------------------------------------------------------------------------
/conf_data/hive-udf.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive-udf.txt
--------------------------------------------------------------------------------
/conf_data/hive.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive.txt
--------------------------------------------------------------------------------
/conf_data/hive安装-视频.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive安装-视频.txt
--------------------------------------------------------------------------------
/conf_data/hive笔记.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive笔记.txt
--------------------------------------------------------------------------------
/conf_data/kafka安装配置/kafka安装:
--------------------------------------------------------------------------------
1 | kafka笔记
2 |
3 |
4 | 集群安装
5 | 1、解压
6 | 2、修改server.properties
7 | broker.id=1
8 | zookeeper.connect=weekend05:2181,weekend06:2181,weekend07:2181
9 |
10 | 3、将zookeeper集群启动
11 |
12 | 4、在每一台节点上启动broker
13 | bin/kafka-server-start.sh config/server.properties
14 |
15 | 5、在kafka集群中创建一个topic
16 | bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order
17 |
18 | 6、用一个producer向某一个topic中写入消息
19 | bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order
20 |
21 | 7、用一个comsumer从某一个topic中读取信息
22 | bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order
23 |
24 | 8、查看一个topic的分区及副本状态信息
25 | bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order
--------------------------------------------------------------------------------
/conf_data/kafka安装配置/kafka笔记.txt:
--------------------------------------------------------------------------------
1 | kafka笔记
2 | 1/kafka是一个分布式的消息缓存系统
3 | 2/kafka集群中的服务器都叫做broker
4 | 3/kafka有两类客户端,一类叫producer(消息生产者),一类叫做consumer(消息消费者),客户端和broker服务器之间采用tcp协议连接
5 | 4/kafka中不同业务系统的消息可以通过topic进行区分,而且每一个消息topic都会被分区,以分担消息读写的负载
6 | 5/每一个分区都可以有多个副本,以防止数据的丢失
7 | 6/某一个分区中的数据如果需要更新,都必须通过该分区所有副本中的leader来更新
8 | 7/消费者可以分组,比如有两个消费者组A和B,共同消费一个topic:order_info,A和B所消费的消息不会重复
9 | 比如 order_info 中有100个消息,每个消息有一个id,编号从0-99,那么,如果A组消费0-49号,B组就消费50-99号
10 | 8/消费者在具体消费某个topic中的消息时,可以指定起始偏移量
11 |
12 |
13 |
14 |
15 | 集群安装
16 | 1、解压
17 | cd /home/xxproject/lib
18 | tar -xzvf kafka_2.11-0.11.0.1.tgz
19 | ln -sf kafka_2.11-0.11.0.1 kafka
20 |
21 |
22 | 修改环境变量
23 | echo '
24 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
25 | export KAFKA_HOME=/home/xxproject/lib/kafka
26 | export PATH=${PATH}:${KAFKA_HOME}/bin
27 | ' >> ~/.bash_profile
28 | source ~/.bash_profile
29 |
30 |
31 | 2、修改server.properties
32 | ===================================================================
33 | broker.id=1/2/3
34 | zookeeper.connect=node-01:2181,node-02:2181,node-03:2181
35 | ===================================================================
36 |
37 | 3、将zookeeper集群启动
38 |
39 | 4、在每一台节点上启动broker, node-02/3/4上分别启动,指定不同的配置文件
40 | # bin/kafka-server-start.sh config/server-1.properties
41 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-1.properties
42 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-2.properties
43 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-3.properties
44 |
45 |
46 | 5、在kafka集群中创建一个topic
47 | # bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order
48 | kafka-topics.sh --create --zookeeper 'node-01:2181,node-02:2181,node-03:2181' --replication-factor 3 --partitions 1 --topic order-r
49 | kafka-topics.sh --list --zookeeper 'node-01:2181,node-02:2181,node-03:2181'
50 | kafka-topics.sh --describe --zookeeper 'node-01:2181,node-02:2181,node-03:2181'
51 |
52 | 6、用一个producer向某一个topic中写入消息
53 | # bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order
54 | kafka-console-producer.sh --broker-list node-02:9092 --topic order
55 | >>> This is a message
56 | >>> This is another message
57 |
58 | kafka-console-producer.sh --broker-list node-02:9092,node-03:9092,node-04:9092 --topic order-r
59 | >>> This is a message
60 | >>> This is another message
61 |
62 | 7、用一个comsumer从某一个topic中读取信息
63 | # bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order
64 | kafka-console-consumer.sh --bootstrap-server node-02:9092 --topic order --from-beginning
65 | kafka-console-consumer.sh --bootstrap-server node-02:9092,node-03:9092,node-04:9092 --topic order-r --from-beginning
66 |
67 | 8、查看一个topic的分区及副本状态信息
68 | # bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order
69 |
70 |
71 |
--------------------------------------------------------------------------------
/conf_data/order.txt:
--------------------------------------------------------------------------------
1 | 0000101 iphone6plus 64G 6888
2 | 0000102 xiaominote 64G 2388
3 | 0000103 iphone5 64G 6888
4 | 0000104 xiaomi5 64G 2388
5 | 0000105 huawei 64G 6888
6 |
--------------------------------------------------------------------------------
/conf_data/spark安装部署.txt:
--------------------------------------------------------------------------------
1 | Spark参考博客:http://blog.csdn.net/lovehuangjiaju
2 |
3 | 版本:2.2.0
4 | #Pre-build with user-provided Hadoop: "Hadoop free" 版,可应用到任意 Hadoop 版本
5 | #https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-without-hadoop.tgz
6 |
7 | Pre-built for Apache Hadoop 2.7 and later
8 | https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
9 |
10 |
11 | 安装Java==1.8.0_144、Scala==2.11.11
12 | tar -xzvf spark-2.2.0-bin-hadoop2.7.tgz
13 | ln -sf spark-2.2.0-bin-hadoop2.7 spark
14 |
15 | 配置环境变量
16 | echo '
17 | # !!!No Modification, This Section is Auto Generated by Spark
18 | export SPARK_HOME=/home/xxproject/lib/spark
19 | export PATH=${PATH}:${SPARK_HOME}/bin
20 | ' >> ~/.bash_profile
21 | source ~/.bash_profile
22 |
23 | 配置slaves节点
24 | cd spark/conf/
25 | cp slaves.template slaves
26 | vi slaves
27 |
28 | # localhost
29 | node-01
30 | node-02
31 | node-03
32 | node-04
33 |
34 | 配置spark启动环境变量
35 | cp spark-env.sh.template spark-env.sh
36 | vi spark-env.sh
37 |
38 | export JAVA_HOME=/home/xxproject/lib/jdk
39 | # export SCALA_HOME=/home/xxproject/lib/scala
40 | export SPARK_MASTER_HOST=node-01
41 | export SPARK_MASTER_PORT=7077
42 | # export MASTER=spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT}
43 | export SPARK_WORKER_CORES=1
44 | # export SPARK_WORKER_INSTANCES=1
45 | export SPARK_WORKER_MEMORY=1g
46 | # export HADOOP_CONF_DIR=/opt/hadoop-2.7.3/etc/hadoop
47 | # export HADOOP_HOME=/home/hadoop/package/hadoop-2.7.2/etc/hadoop
48 | # export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
49 |
50 | 启动spark集群:
51 | sbin/start-all.sh
52 |
53 |
54 | 查看界面:http://10.20.0.11:8080/
55 | 测试一下:
56 | cd /home/xxproject/lib/spark
57 | spark-submit --class org.apache.spark.examples.SparkPi --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1 /home/xxproject/lib/spark/examples/jars/spark-examples_2.11-2.2.0.jar 100
58 |
59 | spark-shell测试:
60 | spark-shell --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1
61 |
62 | val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/")
63 | val words = lines.flatMap(line => line.split(" ") )
64 | val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b)
65 | wordCounts.collect().foreach(println)
66 | wordCounts.partitions.length
67 | wordCounts.saveAsTextFile("file:///tmp/output")
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/conf_data/spark运行命令样例.txt:
--------------------------------------------------------------------------------
1 | local单机模式:
2 | 结果xshell可见:
3 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master local[1] ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
4 |
5 | standalone集群模式:
6 | 需要的配置项
7 | 1, slaves文件
8 | 2, spark-env.sh
9 | export JAVA_HOME=/usr/soft/jdk1.7.0_71
10 | export SPARK_MASTER_IP=spark001
11 | export SPARK_MASTER_PORT=7077
12 | export SPARK_WORKER_CORES=1
13 | export SPARK_WORKER_INSTANCES=1
14 | export SPARK_WORKER_MEMORY=1g
15 |
16 | standalone集群模式:
17 | 之client模式:
18 | 结果xshell可见:
19 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
20 |
21 |
22 | standalone集群模式:
23 | 之cluster模式:
24 | 结果spark001:8080里面可见!
25 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --deploy-mode cluster --supervise --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
26 |
27 | Yarn集群模式:
28 | 需要的配置项
29 | 1, spark-env.sh
30 | export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
31 | export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
32 | export SPARK_HOME=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4
33 | export SPARK_JAR=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4/lib/spark-assembly-1.3.1-hadoop2.4.0.jar
34 | export PATH=$SPARK_HOME/bin:$PATH
35 | 2, ~/.bash_profile
36 | 配置好hadoop环境变量
37 |
38 | Yarn集群模式:
39 | client模式:
40 | 结果xshell可见:
41 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
42 |
43 | Yarn集群模式:
44 | cluster模式:
45 | 结果spark001:8088里面可见!
46 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-cluster --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100
47 |
48 |
--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt
--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | ########### These MUST be filled in for a storm configuration
18 |
19 | storm.zookeeper.servers:
20 | - "node-01"
21 | - "node-02"
22 | - "node-0381"
23 | storm.zookeeper.port: 2181
24 | nimbus.seeds: ["node-01"]
25 |
26 | # ##### These may optionally be filled in:
27 | #
28 | ## List of custom serializations
29 | # topology.kryo.register:
30 | # - org.mycompany.MyType
31 | # - org.mycompany.MyType2: org.mycompany.MyType2Serializer
32 | #
33 | ## List of custom kryo decorators
34 | # topology.kryo.decorators:
35 | # - org.mycompany.MyDecorator
36 | #
37 | ## Locations of the drpc servers
38 | # drpc.servers:
39 | # - "server1"
40 | # - "server2"
41 |
42 | ## Metrics Consumers
43 | ## max.retain.metric.tuples
44 | ## - task queue will be unbounded when max.retain.metric.tuples is equal or less than 0.
45 | ## whitelist / blacklist
46 | ## - when none of configuration for metric filter are specified, it'll be treated as 'pass all'.
47 | ## - you need to specify either whitelist or blacklist, or none of them. You can't specify both of them.
48 | ## - you can specify multiple whitelist / blacklist with regular expression
49 | ## expandMapType: expand metric with map type as value to multiple metrics
50 | ## - set to true when you would like to apply filter to expanded metrics
51 | ## - default value is false which is backward compatible value
52 | ## metricNameSeparator: separator between origin metric name and key of entry from map
53 | ## - only effective when expandMapType is set to true
54 | # topology.metrics.consumer.register:
55 | # - class: "org.apache.storm.metric.LoggingMetricsConsumer"
56 | # max.retain.metric.tuples: 100
57 | # parallelism.hint: 1
58 | # - class: "org.mycompany.MyMetricsConsumer"
59 | # max.retain.metric.tuples: 100
60 | # whitelist:
61 | # - "execute.*"
62 | # - "^__complete-latency$"
63 | # parallelism.hint: 1
64 | # argument:
65 | # - endpoint: "metrics-collector.mycompany.org"
66 | # expandMapType: true
67 | # metricNameSeparator: "."
68 |
69 | ## Cluster Metrics Consumers
70 | # storm.cluster.metrics.consumer.register:
71 | # - class: "org.apache.storm.metric.LoggingClusterMetricsConsumer"
72 | # - class: "org.mycompany.MyMetricsConsumer"
73 | # argument:
74 | # - endpoint: "metrics-collector.mycompany.org"
75 | #
76 | # storm.cluster.metrics.consumer.publish.interval.secs: 60
--------------------------------------------------------------------------------
/conf_data/storm安装配置/storm安装手册及笔记.txt:
--------------------------------------------------------------------------------
1 | 1、安装一个zookeeper集群
2 |
3 | 2、上传storm的安装包,解压
4 | /home/xxproject/lib
5 | tar -xzvf apache-storm-1.1.1.tar.gz
6 | ln -sf apache-storm-1.1.1 storm
7 |
8 | 3、修改配置文件storm.yaml
9 | ======================================================
10 | storm.zookeeper.servers:
11 | - "node-01"
12 | - "node-02"
13 | - "node-0381"
14 | storm.zookeeper.port: 2181
15 | nimbus.seeds: ["node-01"]
16 | ======================================================
17 |
18 | #所使用的zookeeper集群主机
19 | storm.zookeeper.servers:
20 | - "weekend05"
21 | - "weekend06"
22 | - "weekend07"
23 |
24 | #nimbus所在的主机名
25 | nimbus.host: "weekend05"
26 |
27 |
28 | 配置环境变量:
29 | echo '
30 | # !!!No Modification, This Section is Auto Generated by ZooKeeper
31 | export STORM_HOME=/home/xxproject/lib/storm
32 | export PATH=${PATH}:${STORM_HOME}/bin
33 | ' >> ~/.bash_profile
34 | source ~/.bash_profile
35 |
36 |
37 | 最多启动5个Worker进程的意思,默认是4个,暂时不需要调整
38 | supervisor.slots.ports
39 | -6701
40 | -6702
41 | -6703
42 | -6704
43 | -6705
44 |
45 | 启动storm
46 | 在nimbus主机上,node-01节点
47 | nohup storm nimbus 1>/dev/null 2>&1 &
48 | nohup storm ui 1>/dev/null 2>&1 &
49 |
50 | 访问: http://10.20.0.11:8080/index.html
51 |
52 | 在supervisor主机上, node-02/3等两个节点上面都启动
53 | nohup storm supervisor 1>/dev/null 2>&1 &
54 |
55 |
56 | storm的深入学习:
57 | 分布式共享锁的实现
58 | 事务topology的实现机制及开发模式
59 | 在具体场景中的跟其他框架的整合(flume/activeMQ/kafka(分布式的消息队列系统) /redis/hbase/mysql cluster)
60 |
61 | 遗留问题:
62 | 对事物的支持
63 |
64 |
65 |
66 |
67 | 提交任务:
68 | storm jar storm.jar com.xcompany.xproject.storm.TestTopo
69 | storm list
70 | storm kill brandNameTopo
71 |
72 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/conf_data/udf.txt:
--------------------------------------------------------------------------------
1 | 1389990045,http://www.163.com,2000
2 | 1385566005,http://www.163.com,2000
3 | 1385566005,http://www.163.com,2000
4 | 1389990045,http://www.163.com,2000
5 | 1390876045,http://www.163.com,2000
6 | 1385566005,http://www.163.com,2000
7 | 1390876045,http://www.163.com,2000
8 | 1390876045,http://www.163.com,2000
9 | 1389990045,http://www.163.com,2000
10 |
11 | select myfunction(nbr),url,flow from t_flow;
12 |
13 |
14 | 1389990045 beijing http://www.163.com 2000
15 | 1385566005,beijing http://www.163.com 2000
16 | 1385566005,beijing http://www.163.com 2000
17 | 1389990045,tianjing,http://www.163.com,2000
18 | 1390876045,tianjing,http://www.163.com,2000
19 | 1385566005,tianjing,http://www.163.com,2000
20 | 1390876045,beijing,http://www.163.com,2000
21 | 1390876045,nanjing,http://www.163.com,2000
22 | 1389990045,nanjing,http://www.163.com,2000
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/conf_data/udt.test.txt:
--------------------------------------------------------------------------------
1 | 1389990045,http://www.163.com,2000
2 | 1385566005,http://www.163.com,2000
3 | 1385566005,http://www.163.com,2000
4 | 1389990045,http://www.163.com,2000
5 | 1390876045,http://www.163.com,2000
6 | 1385566005,http://www.163.com,2000
7 | 1390876045,http://www.163.com,2000
8 | 1390876045,http://www.163.com,2000
9 | 1389990045,http://www.163.com,2000
10 |
--------------------------------------------------------------------------------
/conf_data/word-count.txt:
--------------------------------------------------------------------------------
1 | hello world
2 | hello tom
3 | hello jim
4 | hello kitty
5 | hello baby
6 |
--------------------------------------------------------------------------------
/conf_data/zoo.cfg:
--------------------------------------------------------------------------------
1 | # The number of milliseconds of each tick
2 | tickTime=2000
3 | # The number of ticks that the initial
4 | # synchronization phase can take
5 | initLimit=10
6 | # The number of ticks that can pass between
7 | # sending a request and getting an acknowledgement
8 | syncLimit=5
9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just
11 | # example sakes.
12 | # dataDir=/tmp/zookeeper
13 | dataDir=/home/xxproject/data/zookeeper
14 | # the port at which the clients will connect
15 | clientPort=2181
16 | # the maximum number of client connections.
17 | # increase this if you need to handle more clients
18 | #maxClientCnxns=60
19 | #
20 | # Be sure to read the maintenance section of the
21 | # administrator guide before turning on autopurge.
22 | #
23 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
24 | #
25 | # The number of snapshots to retain in dataDir
26 | #autopurge.snapRetainCount=3
27 | # Purge task interval in hours
28 | # Set to "0" to disable auto purge feature
29 | #autopurge.purgeInterval=1
30 |
31 | # Added By ZooKeeper
32 | server.1=node-01:2888:3888
33 | server.2=node-02:2888:3888
34 | server.3=node-03:2888:3888
35 |
36 |
--------------------------------------------------------------------------------
/conf_data/zoo_sample.cfg:
--------------------------------------------------------------------------------
1 | # The number of milliseconds of each tick
2 | tickTime=2000
3 | # The number of ticks that the initial
4 | # synchronization phase can take
5 | initLimit=10
6 | # The number of ticks that can pass between
7 | # sending a request and getting an acknowledgement
8 | syncLimit=5
9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just
11 | # example sakes.
12 | dataDir=/tmp/zookeeper
13 | # the port at which the clients will connect
14 | clientPort=2181
15 | # the maximum number of client connections.
16 | # increase this if you need to handle more clients
17 | #maxClientCnxns=60
18 | #
19 | # Be sure to read the maintenance section of the
20 | # administrator guide before turning on autopurge.
21 | #
22 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
23 | #
24 | # The number of snapshots to retain in dataDir
25 | #autopurge.snapRetainCount=3
26 | # Purge task interval in hours
27 | # Set to "0" to disable auto purge feature
28 | #autopurge.purgeInterval=1
29 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | fs.defaultFS
23 | hdfs://node-01:9000
24 | The name of the default file system.
25 |
26 |
27 |
28 | hadoop.tmp.dir
29 | /home/xxproject/data/hadoop/tmp
30 | A base for other temporary directories.
31 |
32 |
33 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | dfs.namenode.secondary.http-address
23 | node-02:50090
24 | The secondary namenode http server address and port.
25 |
26 |
27 | dfs.namenode.secondary.https-address
28 | node-02:50091
29 | The secondary namenode HTTPS server address and port.
30 |
31 |
32 |
33 | dfs.namenode.http-address
34 | node-01:50070
35 | The address and the base port where the dfs namenode web ui will listen on.
36 |
37 |
38 |
39 | dfs.replication
40 | 3
41 |
42 |
43 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/mapred-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | mapreduce.framework.name
23 | yarn
24 | The runtime framework for executing MapReduce jobs. Can be one of local, classic or yarn.
25 |
26 |
27 |
28 |
29 | yarn.app.mapreduce.am.resource.mb
30 | 1536
31 | The amount of memory the MR AppMaster needs.
32 |
33 |
34 | yarn.app.mapreduce.am.resource.cpu-vcores
35 | 1
36 | The number of virtual CPU cores the MR AppMaster needs.
37 |
38 |
39 |
40 |
44 |
45 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/masters:
--------------------------------------------------------------------------------
1 | node-02
2 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/slaves:
--------------------------------------------------------------------------------
1 | node-02
2 | node-03
3 | node-04
4 |
5 |
--------------------------------------------------------------------------------
/conf_data/非HA场景/yarn-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 | yarn.resourcemanager.hostname
20 | node-01
21 |
22 |
23 |
24 |
25 | yarn.resourcemanager.webapp.address
26 | node-01:8088
27 |
28 |
29 |
30 |
31 | yarn.resourcemanager.webapp.https.address
32 | node-01:8090
33 |
34 |
35 |
36 |
37 | yarn.nodemanager.aux-services
38 | mapreduce_shuffle
39 |
40 |
41 |
42 |
43 | yarn.nodemanager.resource.memory-mb
44 | 1536
45 |
46 |
47 | yarn.nodemanager.resource.cpu-vcores
48 | 1
49 |
50 |
51 |
52 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/data_analyze.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.jpg
--------------------------------------------------------------------------------
/data_analyze.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.png
--------------------------------------------------------------------------------
/hadoop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/hadoop.jpg
--------------------------------------------------------------------------------
/hbase/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hbase/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/hbase/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hbase
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hbase/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/hbase/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/hbase/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | hbase
14 |
15 |
16 |
18 |
19 | log4j
20 | log4j
21 | 1.2.17
22 |
23 |
24 | org.apache.hbase
25 | hbase-client
26 | 1.2.6
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/hbase/src/main/java/com/xcompany/xproject/hbase/App.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hbase;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/hbase/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/hbase/src/test/java/com/xcompany/xproject/hbase/AppTest.java:
--------------------------------------------------------------------------------
1 | //package com.xcompany.xproject.hbase;
2 | //
3 | //import junit.framework.Test;
4 | //import junit.framework.TestCase;
5 | //import junit.framework.TestSuite;
6 | //
7 | ///**
8 | // * Unit test for simple App.
9 | // */
10 | //public class AppTest
11 | // extends TestCase
12 | //{
13 | // /**
14 | // * Create the test case
15 | // *
16 | // * @param testName name of the test case
17 | // */
18 | // public AppTest( String testName )
19 | // {
20 | // super( testName );
21 | // }
22 | //
23 | // /**
24 | // * @return the suite of tests being tested
25 | // */
26 | // public static Test suite()
27 | // {
28 | // return new TestSuite( AppTest.class );
29 | // }
30 | //
31 | // /**
32 | // * Rigourous Test :-)
33 | // */
34 | // public void testApp()
35 | // {
36 | // assertTrue( true );
37 | // }
38 | //}
39 |
--------------------------------------------------------------------------------
/hdfs/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/hdfs/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/hdfs/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hdfs
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hdfs/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/hdfs/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/hdfs/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop
5 | com.xcompany.xproject
6 | 1.0.0-RELEASE
7 |
8 | 4.0.0
9 | hdfs
10 | hdfs
11 |
12 |
13 | junit
14 | junit
15 | 4.12
16 | test
17 |
18 |
19 | hamcrest-core
20 | org.hamcrest
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/hdfs/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | hdfs
14 | hdfs
15 |
16 |
17 |
18 | org.apache.hadoop
19 | hadoop-client
20 | 2.7.4
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hdfs/src/main/java/com/xcompany/xproject/hdfs/App.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hdfs;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/hdfs/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/hdfs/src/test/java/com/xcompany/xproject/hdfs/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hdfs;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/hdfs/src/test/java/com/xcompany/xproject/hdfs/HDFSTest.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hdfs;
2 |
3 | import java.io.FileInputStream;
4 | import java.io.FileNotFoundException;
5 | import java.io.FileOutputStream;
6 | import java.io.IOException;
7 |
8 | import org.apache.commons.io.IOUtils;
9 | import org.apache.hadoop.conf.Configuration;
10 | import org.apache.hadoop.fs.FSDataInputStream;
11 | import org.apache.hadoop.fs.FSDataOutputStream;
12 | import org.apache.hadoop.fs.FileSystem;
13 | import org.apache.hadoop.fs.LocatedFileStatus;
14 | import org.apache.hadoop.fs.Path;
15 | import org.apache.hadoop.fs.RemoteIterator;
16 | import org.junit.After;
17 | import org.junit.Before;
18 | import org.junit.Test;
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 |
22 | public class HDFSTest {
23 |
24 | private static final Logger LOGGER = LoggerFactory.getLogger(HDFSTest.class);
25 | private FileSystem fs = null;
26 |
27 | @Before
28 | public void setUp() throws IOException {
29 | Configuration conf = new Configuration();
30 | conf.set("fs.defaultFS", "hdfs://node-01:9000");
31 | fs = FileSystem.get(conf);
32 | }
33 |
34 | @After
35 | public void tearDown() throws IOException {
36 | fs.close();
37 | }
38 |
39 | @Test
40 | public void testList() throws FileNotFoundException, IOException {
41 | Path f = new Path("/");
42 | RemoteIterator files = fs.listFiles(f, true);
43 | while (files.hasNext()) {
44 | LocatedFileStatus file = (LocatedFileStatus) files.next();
45 | LOGGER.info("====={}", file.getPath());
46 | }
47 | }
48 |
49 | @Test
50 | public void testPut() throws IOException {
51 | Path f = new Path("/put-word-count.txt");
52 | FSDataOutputStream fsDataOutputStream = fs.create(f, true);
53 | FileInputStream fileInputStream = new FileInputStream("/home/xxproject/word-count.txt");
54 | IOUtils.copy(fileInputStream, fsDataOutputStream);
55 | }
56 |
57 | @Test
58 | public void testGet() throws IOException {
59 | Path f = new Path("/put/word-count.txt");
60 | FSDataInputStream fsDataInputStream = fs.open(f);
61 | FileOutputStream fileOutputStream = new FileOutputStream("/home/xxproject/get-word-count.txt");
62 | IOUtils.copy(fsDataInputStream, fileOutputStream);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/hive/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/hive/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/hive/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hive
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hive/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/hive/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/hive/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop
5 | com.xcompany.xproject
6 | 1.0.0-RELEASE
7 |
8 | 4.0.0
9 | hive
10 | hive
11 |
12 | ${project.artifactId}-${project.version}
13 |
14 |
15 | maven-shade-plugin
16 | 2.2
17 |
18 |
19 | package
20 |
21 | shade
22 |
23 |
24 |
25 |
26 | *:*
27 |
28 | META-INF/*.SF
29 | META-INF/*.DSA
30 | META-INF/*.RSA
31 | META-INF/MANIFEST.MF
32 | META-INF/log4j-provider.properties
33 |
34 |
35 |
36 |
37 |
38 | META-INF/spring.handlers
39 |
40 |
41 | com.xcompany.xproject.hive.Phone2Area
42 |
43 |
44 | META-INF/spring.schemas
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | junit
56 | junit
57 | 4.12
58 | test
59 |
60 |
61 | hamcrest-core
62 | org.hamcrest
63 |
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/hive/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | hive
14 |
15 |
16 |
18 |
19 |
20 | org.apache.hive
21 | hive-exec
22 | 2.1.1
23 |
24 |
25 | log4j
26 | log4j
27 | 1.2.17
28 |
29 |
30 |
32 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | org.apache.maven.plugins
41 | maven-shade-plugin
42 | 2.2
43 |
44 |
45 | package
46 |
47 | shade
48 |
49 |
50 |
51 |
52 | *:*
53 |
54 | META-INF/*.SF
55 | META-INF/*.DSA
56 | META-INF/*.RSA
57 | META-INF/MANIFEST.MF
58 | META-INF/log4j-provider.properties
59 |
60 |
61 |
62 |
63 |
64 |
66 | META-INF/spring.handlers
67 |
68 |
70 | com.xcompany.xproject.hive.Phone2Area
71 |
72 |
74 | META-INF/spring.schemas
75 |
76 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | ${project.artifactId}-${project.version}
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/xcompany/xproject/hive/Phone2Area.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hive;
2 |
3 | import java.util.HashMap;
4 |
5 | import org.apache.hadoop.hive.ql.exec.UDF;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | /*
10 | * mvn clean --projects=com.xcompany.xproject:hive
11 | * mvn install --projects=com.xcompany.xproject:hive
12 | * java -jar hive/target/hive-1.0.0-RELEASE.jar
13 | */
14 | public class Phone2Area extends UDF {
15 |
16 | private static final Logger LOGGER = LoggerFactory.getLogger(Phone2Area.class);
17 |
18 | // Load Once, Speed Up
19 | private static HashMap areaMap = new HashMap();
20 |
21 | private static void loadData() {
22 | areaMap.put("135", "beijing");
23 | areaMap.put("136", "shanghai");
24 | areaMap.put("137", "xian");
25 | areaMap.put("138", "wuhan");
26 | }
27 |
28 | static {
29 | // System.setProperty("log4j2.loggerContextFactory", "org.apache.logging.log4j.core.impl.Log4jContextFactory");
30 | loadData();
31 | }
32 |
33 | public String evaluate(String phoneNum) {
34 | String preKey = phoneNum.substring(0,3);
35 | return (areaMap.get(preKey) == null) ? "other" : areaMap.get(preKey);
36 | }
37 |
38 | public static void main(String[] args) {
39 | Phone2Area phone2Area = new Phone2Area();
40 | LOGGER.error(phone2Area.evaluate("18665817689"));
41 | }
42 | }
43 |
44 |
--------------------------------------------------------------------------------
/hive/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/hive/src/test/java/com/xcompany/xproject/hive/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.hive;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/kafka/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/kafka/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/kafka/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | kafka
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/kafka/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/kafka/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | kafka
14 |
15 |
16 |
17 |
19 |
20 |
21 | org.apache.kafka
22 | kafka_2.11
23 | 0.11.0.1
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/kafka/src/main/java/com/xcompany/xproject/kafka/TestConsumer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.kafka;
2 |
3 | import java.util.Arrays;
4 | import java.util.Properties;
5 |
6 | import org.apache.kafka.clients.consumer.Consumer;
7 | import org.apache.kafka.clients.consumer.ConsumerRecord;
8 | import org.apache.kafka.clients.consumer.ConsumerRecords;
9 | import org.apache.kafka.clients.consumer.KafkaConsumer;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 |
13 | public class TestConsumer {
14 |
15 | private static final Logger LOGGER = LoggerFactory.getLogger(TestConsumer.class);
16 |
17 | public static void main(String[] args) {
18 | Properties properties = new Properties();
19 | // bin/kafka-topics.sh
20 | properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181");
21 | // kafka-console-producer.sh
22 | properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092");
23 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
24 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 | // kafka-console-consumer.sh
26 | properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092");
27 |
28 | // must sepc group.id
29 | properties.put("group.id", "test-group-new");
30 | properties.put("auto.offset.reset", "earliest");
31 |
32 | Consumer consumer = new KafkaConsumer(properties);
33 | consumer.subscribe(Arrays.asList("order-r"));
34 | try {
35 | while (true) {
36 | ConsumerRecords records = consumer.poll(1000); // ms
37 | for (ConsumerRecord record : records) {
38 | LOGGER.info("offset = {}, key = {}, value = {}\n", record.offset(), record.key(), record.value());
39 | }
40 | }
41 | } catch (Exception e) {
42 | } finally {
43 | consumer.close();
44 | }
45 |
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/kafka/src/main/java/com/xcompany/xproject/kafka/TestProducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.kafka;
2 |
3 | import java.util.Properties;
4 |
5 | import org.apache.kafka.clients.producer.KafkaProducer;
6 | import org.apache.kafka.clients.producer.Producer;
7 | import org.apache.kafka.clients.producer.ProducerRecord;
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 |
12 | public class TestProducer {
13 |
14 | private static final Logger LOGGER = LoggerFactory.getLogger(TestProducer.class);
15 |
16 | public static void main(String[] args) {
17 | Properties properties = new Properties();
18 | // bin/kafka-topics.sh
19 | properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181");
20 | // kafka-console-producer.sh
21 | properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092");
22 | properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
23 | properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
24 | // kafka-console-consumer.sh
25 | properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092");
26 |
27 |
28 | Producer producer = new KafkaProducer(properties);
29 |
30 | LOGGER.info("roduce start...");
31 | for (int i = 0; i < 100; i++) {
32 | ProducerRecord msg = new ProducerRecord("order-r", "name", "Hello_XXX_" + i);
33 | producer.send(msg);
34 | }
35 | producer.close();
36 | LOGGER.info("produce end...");
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/kafka/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hive.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hive.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/mmdetection/1-mmdection安装使用记录.txt:
--------------------------------------------------------------------------------
1 | # 0、mmdection各组件依赖版本
2 | # 参考:https://mmdetection.readthedocs.io/en/latest/get_started.html
3 | Linux or macOS (Windows is in experimental support)
4 | Python 3.6+:3.7.4
5 | PyTorch 1.3+:1.4
6 | CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible):10.1
7 | GCC 5+
8 | MMCV
9 |
10 | # 1、参考:https://phoenixnap.com/kb/how-to-install-anaconda-ubuntu-18-04-or-20-04
11 | # curl –O https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh
12 | wget https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh
13 | bash Anaconda3-2020.02-Linux-x86_64.sh
14 |
15 | # 2、Create a conda virtual environment and activate it
16 | conda create -n open-mmlab python=3.7.4 -y
17 | conda activate open-mmlab
18 |
19 | # 3、Install PyTorch and torchvision following the official instructions
20 | # https://pytorch.org/get-started/locally/#windows-pip
21 | conda install pytorch=1.6.0 cudatoolkit=10.1 torchvision==0.7.0 -c pytorch -y
22 |
23 | import torch
24 | x = torch.rand(5, 3)
25 | print(x)
26 | import torch
27 | torch.cuda.is_available()
28 |
29 | # 4、Install mmcv-full, we recommend you to install the pre-build package as below
30 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
31 | # pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
32 |
33 | #5、Clone the MMDetection repository.
34 | sudo apt-get -y install build-essential nghttp2 libnghttp2-dev libssl-dev
35 | git clone https://github.com/open-mmlab/mmdetection.git
36 | cd mmdetection
37 | 或者直接下载:wget https://github.com/open-mmlab/mmdetection/archive/v2.10.0.zip
38 | unzip mmdetection-2.10.0.zip
39 | mv mmdetection-2.10.0 mmdetection
40 |
41 | # 6、Install build requirements and then install MMDetection.
42 | pip install -r requirements/build.txt
43 | pip install -v -e . # or "python setup.py develop"
44 |
45 | # 7、通过Docker镜像安装使用
46 | # 镜像仓库:https://hub.docker.com/search?q=mmdetection&type=image
47 | # We provide a Dockerfile to build an image. Ensure that you are using docker version >=19.03.
48 | # build an image with PyTorch 1.6, CUDA 10.1
49 | docker build -t mmdetection docker/
50 | docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmdetection/data mmdetection
51 |
52 | # 8、验证环境是否安装成功
53 | import torch
54 | available_gpus = [torch.cuda.get_device_properties(i) for i in range(torch.cuda.device_count())]
55 | available_gpus
56 |
57 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
58 | x = torch.tensor([1, 2, 3], device=device)
59 | print(x)
60 |
61 |
62 | from mmdet.apis import init_detector, inference_detector
63 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
64 | # download the checkpoint from model zoo and put it in `checkpoints/`
65 | # url: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
66 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
67 | device = 'cuda:0'
68 | # init a detector
69 | model = init_detector(config_file, checkpoint_file, device=device)
70 | # inference the demo image
71 | inference_detector(model, 'demo/demo.jpg')
72 |
73 | # 8、Azure的GPU服务器规格
74 | 规格:Standard_NC6s_v3
75 | CPU:6核,内存:112G
76 | GPU:1卡,显存:16G
77 |
78 | # nvidia-smi报错的问题
79 | dkms status
80 | sudo apt-get install dkms
81 | sudo dkms install -m nvidia -v 410.78
82 | nvidia-smi
--------------------------------------------------------------------------------
/mmdetection/2-mmdection预测新数据.txt:
--------------------------------------------------------------------------------
1 | # notebook安装新内核
2 | # 参考文档:https://docs.microsoft.com/zh-cn/azure/machine-learning/how-to-run-jupyter-notebooks
3 | conda install pip -y
4 | conda install notebook ipykernel -y
5 | python -m ipykernel install --user --name open-mmlab --display-name "Python (open-mmlab)"
6 |
7 | # 下载预训练的模型参数
8 | cd mmdetection
9 | mkdir -p checkpoints/
10 | wget http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
11 | conda activate open-mmlab
12 |
13 | # 执行代码
14 | from mmdet.apis import init_detector, inference_detector
15 | import mmcv
16 |
17 | # Specify the path to model config and checkpoint file
18 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
19 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
20 |
21 | # build the model from a config file and a checkpoint file
22 | model = init_detector(config_file, checkpoint_file, device='cuda:0')
23 |
24 | # test a single image and show the results
25 | img = 'demo/demo.jpg' # or img = mmcv.imread(img), which will only load it once
26 | result = inference_detector(model, img)
27 | # visualize the results in a new window
28 | model.show_result(img, result)
29 | # or save the visualization results to image files
30 | model.show_result(img, result, out_file='result.jpg')
31 |
32 | # test a video and show the results
33 | video = mmcv.VideoReader('demo/demo.mp4')
34 | for frame in video:
35 | result = inference_detector(model, frame)
36 | model.show_result(frame, result, wait_time=1)
37 |
38 | # AML的notrbook CPU版本
39 | from mmdet.apis import init_detector, inference_detector, show_result_pyplot
40 | import mmcv
41 |
42 | # Specify the path to model config and checkpoint file
43 | config_file = 'mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
44 | checkpoint_file = 'mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
45 |
46 | # build the model from a config file and a checkpoint file
47 | # model = init_detector(config_file, checkpoint_file, device='cuda:0')
48 | model = init_detector(config_file, checkpoint_file, device='cpu') # 默认GPU改写成CPU
49 |
50 |
51 |
52 | # test a single image and show the results
53 | img = 'mmdetection/demo/demo.jpg' # or img = mmcv.imread(img), which will only load it once
54 | result = inference_detector(model, img)
55 | # visualize the results in a new window
56 | model.show_result(img, result)
57 | # or save the visualization results to image files
58 | model.show_result(img, result, out_file='mmdetection/result/result.jpg')
59 |
60 | # show the results
61 | show_result_pyplot(model, img, result)
62 |
63 |
64 | # test a video and show the results
65 | video = mmcv.VideoReader('mmdetection/demo/demo.mp4')
66 | total_frame = 0
67 | for frame in video:
68 | result = inference_detector(model, frame)
69 | # show the results
70 | show_result_pyplot(model, frame, result)
71 | model.show_result(frame, result, wait_time=1)
72 | total_frame += 1
73 | print(total_frame)
74 |
75 |
76 | !cd mmdetection \
77 | && /anaconda/envs/open-mmlab/bin/python demo/image_demo.py \
78 | demo/demo.jpg \
79 | configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py \
80 | checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
81 | --device cpu
82 |
--------------------------------------------------------------------------------
/mmdetection/3-mmdection模型指标测试.txt:
--------------------------------------------------------------------------------
1 | # VOC 数据集下载(使用2007年数据集)
2 | # https://pjreddie.com/projects/pascal-voc-dataset-mirror/
3 | # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html
4 | # https://cocodataset.org/
5 | cd mmdetection
6 | mkdir data
7 |
8 |
9 | # 下载模型权重
10 | # https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc
11 | cd mmdetection
12 | mkdir -p checkpoints/
13 | wget http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth
14 |
15 | # Test Faster R-CNN on PASCAL VOC (without saving the test results) and evaluate the mAP.
16 | # Config and checkpoint files are available here.
17 | !cd mmdetection \
18 | && /anaconda/envs/open-mmlab/bin/python tools/test.py \
19 | configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py \
20 | checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth \
21 | --show-dir faster_rcnn_r50_fpn_1x_results/ \
22 | --eval mAP recall
23 |
24 |
25 | 快速删除大文件夹、大文件
26 | mkdir -p blank
27 | rsync --delete-before -d blank/ VOCdevkit/
28 |
29 |
--------------------------------------------------------------------------------
/mmdetection/README:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mr/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/mr/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/mr/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | mr
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/mr/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/mr/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/mr/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop
5 | com.xcompany.xproject
6 | 1.0.0-RELEASE
7 |
8 | 4.0.0
9 | mr
10 | jar
11 |
12 |
13 | junit
14 | junit
15 | 4.12
16 | test
17 |
18 |
19 | hamcrest-core
20 | org.hamcrest
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/mr/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | mr
14 | jar
15 |
16 |
17 |
18 | org.apache.hadoop
19 | hadoop-client
20 | 2.7.4
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/App.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowBean.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowpartition;
2 |
3 | import java.io.DataInput;
4 | import java.io.DataOutput;
5 | import java.io.IOException;
6 |
7 | import org.apache.hadoop.io.Writable;
8 |
9 | public class FlowBean implements Writable {
10 |
11 | private String phoneNum;
12 | private long upFlow;
13 | private long downFlow;
14 | private long sumFlow;
15 |
16 |
17 | public String getPhoneNum() {
18 | return phoneNum;
19 | }
20 | public void setPhoneNum(String phoneNum) {
21 | this.phoneNum = phoneNum;
22 | }
23 | public long getUpFlow() {
24 | return upFlow;
25 | }
26 | public void setUpFlow(long upFlow) {
27 | this.upFlow = upFlow;
28 | }
29 | public long getDownFlow() {
30 | return downFlow;
31 | }
32 | public void setDownFlow(long downFlow) {
33 | this.downFlow = downFlow;
34 | }
35 | public long getSumFlow() {
36 | return sumFlow;
37 | }
38 | public void setSumFlow(long sumFlow) {
39 | this.sumFlow = sumFlow;
40 | }
41 |
42 | // @Override
43 | // public String toString() {
44 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
45 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
46 | // }
47 | @Override
48 | public String toString() {
49 | return upFlow + "\t" + downFlow + "\t" + sumFlow;
50 | }
51 |
52 | public void write(DataOutput out) throws IOException {
53 | out.writeUTF(phoneNum);
54 | out.writeLong(upFlow);
55 | out.writeLong(downFlow);
56 | out.writeLong(sumFlow);
57 | }
58 | public void readFields(DataInput in) throws IOException {
59 | phoneNum = in.readUTF();
60 | upFlow = in.readLong();
61 | downFlow = in.readLong();
62 | sumFlow = in.readLong();
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartition.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowpartition;
2 |
3 | import java.util.HashMap;
4 |
5 | import org.apache.hadoop.mapreduce.Partitioner;
6 |
7 |
8 | public class FlowPartition extends Partitioner{
9 |
10 | // Load Once, Speed Up
11 | private static HashMap partitionMap = new HashMap();
12 | private static void loadData() {
13 | partitionMap.put("135", 0);
14 | partitionMap.put("136", 1);
15 | partitionMap.put("137", 2);
16 | partitionMap.put("138", 3);
17 | }
18 | static {
19 | loadData();
20 | }
21 |
22 | @Override
23 | public int getPartition(K key, V value, int numPartitions) {
24 | //return 0;
25 | String preKey = key.toString().substring(0,3);
26 | return (partitionMap.get(preKey) == null) ? 4 : partitionMap.get(preKey);
27 | }
28 | }
29 |
30 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowpartition;
2 |
3 | import java.util.Date;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.conf.Configured;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.io.Text;
9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | public class FlowPartitionJob extends Configured implements Tool {
19 |
20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionJob.class);
21 |
22 | public static void main(String[] args) throws Exception {
23 |
24 | Date startTime = new Date();
25 | LOGGER.info("==========job started: " + startTime);
26 | int res = ToolRunner.run(new Configuration(), new FlowPartitionJob(), args);
27 | Date endTime = new Date();
28 | LOGGER.info("==========job ended: " + endTime);
29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | System.exit(res);
31 | }
32 |
33 | public int run(String[] args) throws Exception {
34 |
35 | /*Configuration conf = getConf();
36 | JobClient client = new JobClient(conf);
37 | ClusterStatus cluster = client.getClusterStatus();
38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | String join_reduces = conf.get(REDUCES_PER_HOST);
40 | if (join_reduces != null) {
41 | num_reduces = cluster.getTaskTrackers() *
42 | Integer.parseInt(join_reduces);
43 | }
44 | // Set user-supplied (possibly default) job configs
45 | job.setNumReduceTasks(num_reduces);*/
46 |
47 |
48 | Configuration conf = new Configuration();
49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | String[] otherArgs = new GenericOptionsParser(conf, args)
51 | .getRemainingArgs();
52 |
53 | String commaSeparatedPaths = null;
54 | String outputDir = null;
55 | if (otherArgs.length == 2) {
56 | commaSeparatedPaths = otherArgs[0];
57 | outputDir = otherArgs[1];
58 | } else {
59 | System.err.println("Usage: [,...] ");
60 | //System.exit(-1);
61 | return -1;
62 | }
63 |
64 |
65 | Job job = Job.getInstance(conf);
66 | job.setJobName("FlowPartitionJob");
67 | job.setJarByClass(FlowPartitionJob.class);
68 |
69 | // job.setInputFormatClass(TextInputFormat.class);
70 | // job.setOutputFormatClass(TextOutputFormat.class);
71 |
72 | job.setMapperClass(FlowPartitionMapper.class);
73 | //job.setCombinerClass(WordCountReducer.class);
74 | job.setReducerClass(FlowPartitionReducer.class);
75 |
76 | job.setPartitionerClass(FlowPartition.class);
77 | job.setNumReduceTasks(5);
78 |
79 | job.setOutputKeyClass(Text.class);
80 | job.setOutputValueClass(FlowBean.class);
81 | job.setMapOutputKeyClass(Text.class);
82 | job.setMapOutputValueClass(FlowBean.class);
83 |
84 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
85 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
86 |
87 | return job.waitForCompletion(true) ? 0 : 1;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowpartition;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.Text;
7 | import org.apache.hadoop.mapreduce.Mapper;
8 | import org.apache.hadoop.util.StringUtils;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | public class FlowPartitionMapper extends Mapper {
13 |
14 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionMapper.class);
15 |
16 | private String line = null;
17 | private final static char SEPARATOR = '\t';
18 |
19 | private String phoneNum = null;
20 | private long upFlow = 0;
21 | private long downFlow = 0;
22 | //private long sumFlow = 0;
23 |
24 | private Text text = new Text();
25 | private FlowBean flowBean = new FlowBean();
26 |
27 | @Override
28 | protected void map(LongWritable key, Text value,
29 | Mapper.Context context)
30 | throws IOException, InterruptedException {
31 |
32 | //super.map(key, value, context);
33 | line = value.toString();
34 | String[] fields = StringUtils.split(line, SEPARATOR);
35 | if (fields.length != 11) {
36 | LOGGER.error("invalid line: {}", line);
37 | System.err.println("invalid line: " + line);
38 | } else {
39 | phoneNum = fields[1];
40 | upFlow = Long.parseLong(fields[8]);
41 | downFlow = Long.parseLong(fields[9]);
42 | flowBean.setPhoneNum(phoneNum);
43 | flowBean.setUpFlow(upFlow);
44 | flowBean.setDownFlow(downFlow);
45 | //sumFlow = upFlow + downFlow;
46 | flowBean.setSumFlow(upFlow + downFlow);
47 | text.set(phoneNum);
48 | context.write(text, flowBean);
49 | }
50 |
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowpartition;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.Text;
6 | import org.apache.hadoop.mapreduce.Reducer;
7 |
8 | public class FlowPartitionReducer extends Reducer{
9 |
10 | private FlowBean result = new FlowBean();
11 |
12 | @Override
13 | protected void reduce(Text key, Iterable values,
14 | Reducer.Context context)
15 | throws IOException, InterruptedException {
16 |
17 | //super.reduce(arg0, arg1, arg2);
18 | long upFlow = 0;
19 | long downFlow = 0;
20 | //long flowSum = 0;
21 | for (FlowBean flowBean : values) {
22 | upFlow += flowBean.getUpFlow();
23 | downFlow += flowBean.getDownFlow();
24 | //flowSum += flowBean.getSumFlow();
25 | }
26 | result.setPhoneNum(key.toString());
27 | result.setUpFlow(upFlow);
28 | result.setDownFlow(downFlow);
29 | //result.setSumFlow(flowSum);
30 | result.setSumFlow(upFlow + downFlow);
31 | context.write(key, result);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowBean.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsort;
2 |
3 | import java.io.DataInput;
4 | import java.io.DataOutput;
5 | import java.io.IOException;
6 |
7 | import org.apache.hadoop.io.WritableComparable;
8 |
9 |
10 | public class FlowBean implements WritableComparable {
11 |
12 | private String phoneNum;
13 | private long upFlow;
14 | private long downFlow;
15 | private long sumFlow;
16 |
17 |
18 | public String getPhoneNum() {
19 | return phoneNum;
20 | }
21 | public void setPhoneNum(String phoneNum) {
22 | this.phoneNum = phoneNum;
23 | }
24 | public long getUpFlow() {
25 | return upFlow;
26 | }
27 | public void setUpFlow(long upFlow) {
28 | this.upFlow = upFlow;
29 | }
30 | public long getDownFlow() {
31 | return downFlow;
32 | }
33 | public void setDownFlow(long downFlow) {
34 | this.downFlow = downFlow;
35 | }
36 | public long getSumFlow() {
37 | return sumFlow;
38 | }
39 | public void setSumFlow(long sumFlow) {
40 | this.sumFlow = sumFlow;
41 | }
42 |
43 | // @Override
44 | // public String toString() {
45 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
46 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
47 | // }
48 | @Override
49 | public String toString() {
50 | return phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow;
51 | }
52 |
53 | public void write(DataOutput out) throws IOException {
54 | out.writeUTF(phoneNum);
55 | out.writeLong(upFlow);
56 | out.writeLong(downFlow);
57 | out.writeLong(sumFlow);
58 | }
59 | public void readFields(DataInput in) throws IOException {
60 | phoneNum = in.readUTF();
61 | upFlow = in.readLong();
62 | downFlow = in.readLong();
63 | sumFlow = in.readLong();
64 | }
65 | public int compareTo(FlowBean o) {
66 | //return 0;
67 | // DESC
68 | long thisValue = this.sumFlow;
69 | long thatValue = o.getSumFlow();
70 | return (thisValue < thatValue ? 1 : (thisValue == thatValue ? 0 : -1));
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsort;
2 |
3 | import java.util.Date;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.conf.Configured;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.io.NullWritable;
9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | public class FlowSortJob extends Configured implements Tool {
19 |
20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortJob.class);
21 |
22 | public static void main(String[] args) throws Exception {
23 |
24 | Date startTime = new Date();
25 | LOGGER.info("==========job started: " + startTime);
26 | int res = ToolRunner.run(new Configuration(), new FlowSortJob(), args);
27 | Date endTime = new Date();
28 | LOGGER.info("==========job ended: " + endTime);
29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | System.exit(res);
31 | }
32 |
33 | public int run(String[] args) throws Exception {
34 |
35 | /*Configuration conf = getConf();
36 | JobClient client = new JobClient(conf);
37 | ClusterStatus cluster = client.getClusterStatus();
38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | String join_reduces = conf.get(REDUCES_PER_HOST);
40 | if (join_reduces != null) {
41 | num_reduces = cluster.getTaskTrackers() *
42 | Integer.parseInt(join_reduces);
43 | }
44 | // Set user-supplied (possibly default) job configs
45 | job.setNumReduceTasks(num_reduces);*/
46 |
47 |
48 | Configuration conf = new Configuration();
49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | String[] otherArgs = new GenericOptionsParser(conf, args)
51 | .getRemainingArgs();
52 |
53 | String commaSeparatedPaths = null;
54 | String outputDir = null;
55 | if (otherArgs.length == 2) {
56 | commaSeparatedPaths = otherArgs[0];
57 | outputDir = otherArgs[1];
58 | } else {
59 | System.err.println("Usage: [,...] ");
60 | //System.exit(-1);
61 | return -1;
62 | }
63 |
64 |
65 | Job job = Job.getInstance(conf);
66 | job.setJobName("FlowSortJob");
67 | job.setJarByClass(FlowSortJob.class);
68 |
69 | job.setMapperClass(FlowSortMapper.class);
70 | //job.setCombinerClass(WordCountReducer.class);
71 | job.setReducerClass(FlowSortReducer.class);
72 |
73 | job.setOutputKeyClass(FlowBean.class);
74 | job.setOutputValueClass(NullWritable.class);
75 | job.setMapOutputKeyClass(FlowBean.class);
76 | job.setMapOutputValueClass(NullWritable.class);
77 |
78 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
79 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
80 |
81 | return job.waitForCompletion(true) ? 0 : 1;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsort;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.NullWritable;
7 | import org.apache.hadoop.io.Text;
8 | import org.apache.hadoop.mapreduce.Mapper;
9 | import org.apache.hadoop.util.StringUtils;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 |
13 | public class FlowSortMapper extends Mapper {
14 |
15 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortMapper.class);
16 |
17 | private FlowBean flowBean = new FlowBean();
18 |
19 | private String line = null;
20 | private final static char SEPARATOR = '\t';
21 |
22 | private String phoneNum = null;
23 | private long upFlow = 0;
24 | private long downFlow = 0;
25 | private long sumFlow = 0;
26 |
27 | @Override
28 | protected void map(LongWritable key, Text value,
29 | Mapper.Context context)
30 | throws IOException, InterruptedException {
31 |
32 | //super.map(key, value, context);
33 | line = value.toString();
34 | String[] fields = StringUtils.split(line, SEPARATOR);
35 | if (fields.length != 4) {
36 | LOGGER.error("invalid line: {}", line);
37 | System.err.println("invalid line: " + line);
38 | } else {
39 | phoneNum = fields[0];
40 | upFlow = Long.parseLong(fields[1]);
41 | downFlow = Long.parseLong(fields[2]);
42 | sumFlow = Long.parseLong(fields[3]);
43 | flowBean.setPhoneNum(phoneNum);
44 | flowBean.setUpFlow(upFlow);
45 | flowBean.setDownFlow(downFlow);
46 | flowBean.setSumFlow(sumFlow);
47 | context.write(flowBean, NullWritable.get());
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsort;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.NullWritable;
6 | import org.apache.hadoop.mapreduce.Reducer;
7 |
8 | public class FlowSortReducer extends Reducer{
9 |
10 | @Override
11 | protected void reduce(FlowBean key, Iterable values,
12 | Reducer.Context context)
13 | throws IOException, InterruptedException {
14 |
15 | //super.reduce(arg0, arg1, arg2);
16 | context.write(key, NullWritable.get());
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowBean.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsum;
2 |
3 | import java.io.DataInput;
4 | import java.io.DataOutput;
5 | import java.io.IOException;
6 |
7 | import org.apache.hadoop.io.Writable;
8 |
9 | public class FlowBean implements Writable {
10 |
11 | private String phoneNum;
12 | private long upFlow;
13 | private long downFlow;
14 | private long sumFlow;
15 |
16 |
17 | public String getPhoneNum() {
18 | return phoneNum;
19 | }
20 | public void setPhoneNum(String phoneNum) {
21 | this.phoneNum = phoneNum;
22 | }
23 | public long getUpFlow() {
24 | return upFlow;
25 | }
26 | public void setUpFlow(long upFlow) {
27 | this.upFlow = upFlow;
28 | }
29 | public long getDownFlow() {
30 | return downFlow;
31 | }
32 | public void setDownFlow(long downFlow) {
33 | this.downFlow = downFlow;
34 | }
35 | public long getSumFlow() {
36 | return sumFlow;
37 | }
38 | public void setSumFlow(long sumFlow) {
39 | this.sumFlow = sumFlow;
40 | }
41 |
42 | // @Override
43 | // public String toString() {
44 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow
45 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]";
46 | // }
47 | @Override
48 | public String toString() {
49 | return upFlow + "\t" + downFlow + "\t" + sumFlow;
50 | }
51 |
52 | public void write(DataOutput out) throws IOException {
53 | out.writeUTF(phoneNum);
54 | out.writeLong(upFlow);
55 | out.writeLong(downFlow);
56 | out.writeLong(sumFlow);
57 | }
58 | public void readFields(DataInput in) throws IOException {
59 | phoneNum = in.readUTF();
60 | upFlow = in.readLong();
61 | downFlow = in.readLong();
62 | sumFlow = in.readLong();
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsum;
2 |
3 | import java.util.Date;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.conf.Configured;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.io.Text;
9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | public class FlowSumJob extends Configured implements Tool {
19 |
20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumJob.class);
21 |
22 | public static void main(String[] args) throws Exception {
23 |
24 | Date startTime = new Date();
25 | LOGGER.info("==========job started: " + startTime);
26 | int res = ToolRunner.run(new Configuration(), new FlowSumJob(), args);
27 | Date endTime = new Date();
28 | LOGGER.info("==========job ended: " + endTime);
29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | System.exit(res);
31 | }
32 |
33 | public int run(String[] args) throws Exception {
34 |
35 | /*Configuration conf = getConf();
36 | JobClient client = new JobClient(conf);
37 | ClusterStatus cluster = client.getClusterStatus();
38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | String join_reduces = conf.get(REDUCES_PER_HOST);
40 | if (join_reduces != null) {
41 | num_reduces = cluster.getTaskTrackers() *
42 | Integer.parseInt(join_reduces);
43 | }
44 | // Set user-supplied (possibly default) job configs
45 | job.setNumReduceTasks(num_reduces);*/
46 |
47 |
48 | Configuration conf = new Configuration();
49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | String[] otherArgs = new GenericOptionsParser(conf, args)
51 | .getRemainingArgs();
52 |
53 | String commaSeparatedPaths = null;
54 | String outputDir = null;
55 | if (otherArgs.length == 2) {
56 | commaSeparatedPaths = otherArgs[0];
57 | outputDir = otherArgs[1];
58 | } else {
59 | System.err.println("Usage: [,...] ");
60 | //System.exit(-1);
61 | return -1;
62 | }
63 |
64 |
65 | Job job = Job.getInstance(conf);
66 | job.setJobName("FlowSumJob");
67 | job.setJarByClass(FlowSumJob.class);
68 |
69 | job.setMapperClass(FlowSumMapper.class);
70 | //job.setCombinerClass(WordCountReducer.class);
71 | job.setReducerClass(FlowSumReducer.class);
72 |
73 | job.setOutputKeyClass(Text.class);
74 | job.setOutputValueClass(FlowBean.class);
75 | job.setMapOutputKeyClass(Text.class);
76 | job.setMapOutputValueClass(FlowBean.class);
77 |
78 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
79 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
80 |
81 | return job.waitForCompletion(true) ? 0 : 1;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsum;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.Text;
7 | import org.apache.hadoop.mapreduce.Mapper;
8 | import org.apache.hadoop.util.StringUtils;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | public class FlowSumMapper extends Mapper {
13 |
14 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumMapper.class);
15 |
16 | private String line = null;
17 | private final static char SEPARATOR = '\t';
18 |
19 | private String phoneNum = null;
20 | private long upFlow = 0;
21 | private long downFlow = 0;
22 | //private long sumFlow = 0;
23 |
24 | private Text text = new Text();
25 | private FlowBean flowBean = new FlowBean();
26 |
27 | @Override
28 | protected void map(LongWritable key, Text value,
29 | Mapper.Context context)
30 | throws IOException, InterruptedException {
31 |
32 | //super.map(key, value, context);
33 | line = value.toString();
34 | String[] fields = StringUtils.split(line, SEPARATOR);
35 | if (fields.length != 11) {
36 | LOGGER.error("invalid line: {}", line);
37 | System.err.println("invalid line: " + line);
38 | } else {
39 | phoneNum = fields[1];
40 | upFlow = Long.parseLong(fields[8]);
41 | downFlow = Long.parseLong(fields[9]);
42 | flowBean.setPhoneNum(phoneNum);
43 | flowBean.setUpFlow(upFlow);
44 | flowBean.setDownFlow(downFlow);
45 | //sumFlow = upFlow + downFlow;
46 | flowBean.setSumFlow(upFlow + downFlow);
47 | text.set(phoneNum);
48 | context.write(text, flowBean);
49 | }
50 |
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.flowsum;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.Text;
6 | import org.apache.hadoop.mapreduce.Reducer;
7 |
8 | public class FlowSumReducer extends Reducer{
9 |
10 | private FlowBean result = new FlowBean();
11 |
12 | @Override
13 | protected void reduce(Text key, Iterable values,
14 | Reducer.Context context)
15 | throws IOException, InterruptedException {
16 |
17 | //super.reduce(arg0, arg1, arg2);
18 | long upFlow = 0;
19 | long downFlow = 0;
20 | //long flowSum = 0;
21 | for (FlowBean flowBean : values) {
22 | upFlow += flowBean.getUpFlow();
23 | downFlow += flowBean.getDownFlow();
24 | //flowSum += flowBean.getSumFlow();
25 | }
26 | result.setPhoneNum(key.toString());
27 | result.setUpFlow(upFlow);
28 | result.setDownFlow(downFlow);
29 | //result.setSumFlow(flowSum);
30 | result.setSumFlow(upFlow + downFlow);
31 | context.write(key, result);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.util.Date;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.conf.Configured;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.io.LongWritable;
9 | import org.apache.hadoop.io.Text;
10 | import org.apache.hadoop.mapreduce.Job;
11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
13 | import org.apache.hadoop.util.GenericOptionsParser;
14 | import org.apache.hadoop.util.Tool;
15 | import org.apache.hadoop.util.ToolRunner;
16 | import org.slf4j.Logger;
17 | import org.slf4j.LoggerFactory;
18 |
19 | public class StepOneJob extends Configured implements Tool {
20 |
21 | private static final Logger LOGGER = LoggerFactory.getLogger(StepOneJob.class);
22 |
23 | public static void main(String[] args) throws Exception {
24 |
25 | Date startTime = new Date();
26 | LOGGER.info("==========job started: " + startTime);
27 | int res = ToolRunner.run(new Configuration(), new StepOneJob(), args);
28 | Date endTime = new Date();
29 | LOGGER.info("==========job ended: " + endTime);
30 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
31 | System.exit(res);
32 | }
33 |
34 | public int run(String[] args) throws Exception {
35 |
36 | /*Configuration conf = getConf();
37 | JobClient client = new JobClient(conf);
38 | ClusterStatus cluster = client.getClusterStatus();
39 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
40 | String join_reduces = conf.get(REDUCES_PER_HOST);
41 | if (join_reduces != null) {
42 | num_reduces = cluster.getTaskTrackers() *
43 | Integer.parseInt(join_reduces);
44 | }
45 | // Set user-supplied (possibly default) job configs
46 | job.setNumReduceTasks(num_reduces);*/
47 |
48 |
49 | Configuration conf = new Configuration();
50 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
51 | String[] otherArgs = new GenericOptionsParser(conf, args)
52 | .getRemainingArgs();
53 |
54 | String commaSeparatedPaths = null;
55 | String outputDir = null;
56 | if (otherArgs.length == 2) {
57 | commaSeparatedPaths = otherArgs[0];
58 | outputDir = otherArgs[1];
59 | } else {
60 | System.err.println("Usage: [,...] ");
61 | //System.exit(-1);
62 | return -1;
63 | }
64 |
65 |
66 | Job job = Job.getInstance(conf);
67 | job.setJobName("StepOneJob");
68 | job.setJarByClass(StepOneJob.class);
69 |
70 | // job.setInputFormatClass(TextInputFormat.class);
71 | // job.setOutputFormatClass(TextOutputFormat.class);
72 |
73 | job.setMapperClass(StepOneMapper.class);
74 | job.setCombinerClass(StepOneReducer.class);
75 | job.setReducerClass(StepOneReducer.class);
76 |
77 | // job.setPartitionerClass(FlowPartition.class);
78 | // job.setNumReduceTasks(5);
79 |
80 | job.setOutputKeyClass(Text.class);
81 | job.setOutputValueClass(LongWritable.class);
82 | job.setMapOutputKeyClass(Text.class);
83 | job.setMapOutputValueClass(LongWritable.class);
84 |
85 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
86 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
87 |
88 | return job.waitForCompletion(true) ? 0 : 1;
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.io.IOException;
4 | import java.util.StringTokenizer;
5 |
6 | import org.apache.hadoop.io.LongWritable;
7 | import org.apache.hadoop.io.Text;
8 | import org.apache.hadoop.mapreduce.Mapper;
9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
10 |
11 | public class StepOneMapper extends Mapper{
12 |
13 | // private static final Logger LOGGER = LoggerFactory.getLogger(StepOneMapper.class);
14 |
15 | private final static char SEPARATOR = '\t';
16 |
17 | private Text text = new Text();
18 | private static final LongWritable ONE = new LongWritable(1L);
19 |
20 | @Override
21 | protected void map(LongWritable key, Text value,
22 | Mapper.Context context)
23 | throws IOException, InterruptedException {
24 |
25 | //super.map(key, value, context);
26 | StringTokenizer itr = new StringTokenizer(value.toString());
27 | while (itr.hasMoreTokens()) {
28 | text.set(itr.nextToken() + SEPARATOR + ((FileSplit)context.getInputSplit()).getPath().getName());
29 | context.write(text, ONE);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.Text;
7 | import org.apache.hadoop.mapreduce.Reducer;
8 |
9 | public class StepOneReducer extends Reducer{
10 |
11 | private LongWritable result = new LongWritable();
12 |
13 | @Override
14 | protected void reduce(Text key, Iterable values,
15 | Reducer.Context context)
16 | throws IOException, InterruptedException {
17 |
18 | //super.reduce(arg0, arg1, arg2);
19 | long count = 0;
20 | for (LongWritable value : values) {
21 | count += value.get();
22 | }
23 | result.set(count);
24 | context.write(key, result);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.util.Date;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.conf.Configured;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.io.Text;
9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.apache.hadoop.util.Tool;
14 | import org.apache.hadoop.util.ToolRunner;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | public class StepTwoJob extends Configured implements Tool {
19 |
20 | private static final Logger LOGGER = LoggerFactory.getLogger(StepTwoJob.class);
21 |
22 | public static void main(String[] args) throws Exception {
23 |
24 | Date startTime = new Date();
25 | LOGGER.info("==========job started: " + startTime);
26 | int res = ToolRunner.run(new Configuration(), new StepTwoJob(), args);
27 | Date endTime = new Date();
28 | LOGGER.info("==========job ended: " + endTime);
29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds");
30 | System.exit(res);
31 | }
32 |
33 | public int run(String[] args) throws Exception {
34 |
35 | /*Configuration conf = getConf();
36 | JobClient client = new JobClient(conf);
37 | ClusterStatus cluster = client.getClusterStatus();
38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
39 | String join_reduces = conf.get(REDUCES_PER_HOST);
40 | if (join_reduces != null) {
41 | num_reduces = cluster.getTaskTrackers() *
42 | Integer.parseInt(join_reduces);
43 | }
44 | // Set user-supplied (possibly default) job configs
45 | job.setNumReduceTasks(num_reduces);*/
46 |
47 |
48 | Configuration conf = new Configuration();
49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
50 | String[] otherArgs = new GenericOptionsParser(conf, args)
51 | .getRemainingArgs();
52 |
53 | String commaSeparatedPaths = null;
54 | String outputDir = null;
55 | if (otherArgs.length == 2) {
56 | commaSeparatedPaths = otherArgs[0];
57 | outputDir = otherArgs[1];
58 | } else {
59 | System.err.println("Usage: [,...] ");
60 | //System.exit(-1);
61 | return -1;
62 | }
63 |
64 |
65 | Job job = Job.getInstance(conf);
66 | job.setJobName("StepTwoJob");
67 | job.setJarByClass(StepTwoJob.class);
68 |
69 | // job.setInputFormatClass(TextInputFormat.class);
70 | // job.setOutputFormatClass(TextOutputFormat.class);
71 |
72 | job.setMapperClass(StepTwoMapper.class);
73 | // job.setCombinerClass(StepOneReducer.class);
74 | job.setReducerClass(StepTwoReducer.class);
75 |
76 | // job.setPartitionerClass(FlowPartition.class);
77 | // job.setNumReduceTasks(5);
78 |
79 | job.setOutputKeyClass(Text.class);
80 | job.setOutputValueClass(Text.class);
81 | job.setMapOutputKeyClass(Text.class);
82 | job.setMapOutputValueClass(Text.class);
83 |
84 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
85 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
86 |
87 | return job.waitForCompletion(true) ? 0 : 1;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.Text;
7 | import org.apache.hadoop.mapreduce.Mapper;
8 | import org.apache.hadoop.util.StringUtils;
9 |
10 | public class StepTwoMapper extends Mapper {
11 |
12 | private Text textKey = new Text();
13 | private Text textValue = new Text();
14 |
15 | private final static char SEPARATOR = '\t';
16 | private String line = null;
17 | private String word = null;
18 | private String fileName = null;
19 | private String count = null;
20 |
21 | @Override
22 | protected void map(LongWritable key, Text value,
23 | Mapper.Context context)
24 | throws IOException, InterruptedException {
25 |
26 | //super.map(key, value, context);
27 | line = value.toString();
28 | String[] splits = StringUtils.split(line, SEPARATOR);
29 | word = splits[0];
30 | fileName = splits[1];
31 | count = splits[2];
32 | textKey.set(word);
33 | textValue.set(fileName + SEPARATOR + count);
34 | context.write(textKey, textValue);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.invertedindex;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.Text;
6 | import org.apache.hadoop.mapreduce.Reducer;
7 | import org.apache.hadoop.util.StringUtils;
8 |
9 | public class StepTwoReducer extends Reducer{
10 |
11 | private Text result = new Text();
12 |
13 | @Override
14 | protected void reduce(Text key, Iterable values,
15 | Reducer.Context context) throws IOException,
16 | InterruptedException {
17 |
18 | //super.reduce(arg0, arg1, arg2);
19 | result.set(StringUtils.join(";", values));
20 | context.write(key, result);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountJob.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.wordcount;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.conf.Configuration;
6 | import org.apache.hadoop.fs.Path;
7 | import org.apache.hadoop.io.LongWritable;
8 | import org.apache.hadoop.io.Text;
9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 | import org.apache.hadoop.util.GenericOptionsParser;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 |
16 | /*
17 | * conf: copy hadoop conf to src/main/resources dir or exe jar on hadoop node
18 | * export: wordcount.jar
19 | * example: hadoop jar wordcount.jar com.xcompany.xproject.mr.wordcount.WordCountJob /word-count/input /word-count/output
20 | */
21 | public class WordCountJob {
22 |
23 | private static final Logger LOGGER = LoggerFactory
24 | .getLogger(WordCountJob.class);
25 |
26 | public static void main(String[] args) throws IOException,
27 | ClassNotFoundException, InterruptedException {
28 |
29 | Configuration conf = new Configuration();
30 | //conf.set("fs.defaultFS", "hdfs://node-01:9000");
31 | String[] otherArgs = new GenericOptionsParser(conf, args)
32 | .getRemainingArgs();
33 |
34 | String commaSeparatedPaths = null;
35 | String outputDir = null;
36 | if (otherArgs.length == 2) {
37 | commaSeparatedPaths = otherArgs[0];
38 | outputDir = otherArgs[1];
39 | } else {
40 | System.err.println("Usage: [,...] ");
41 | System.exit(-1);
42 | }
43 |
44 | LOGGER.info("==========job start");
45 | Job job = Job.getInstance(conf);
46 | job.setJobName("WordCountJob");
47 | job.setJarByClass(WordCountJob.class);
48 |
49 | job.setMapperClass(WordCountMapper.class);
50 | job.setCombinerClass(WordCountReducer.class);
51 | job.setReducerClass(WordCountReducer.class);
52 |
53 | job.setOutputKeyClass(Text.class);
54 | job.setOutputValueClass(LongWritable.class);
55 | job.setMapOutputKeyClass(Text.class);
56 | job.setMapOutputValueClass(LongWritable.class);
57 |
58 | FileInputFormat.setInputPaths(job, commaSeparatedPaths);
59 | FileOutputFormat.setOutputPath(job, new Path(outputDir));
60 |
61 | if (job.waitForCompletion(true)) {
62 | LOGGER.info("==========job success");
63 | } else {
64 | LOGGER.info("==========job failed");
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountMapper.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.wordcount;
2 |
3 | import java.io.IOException;
4 | import java.util.StringTokenizer;
5 |
6 | import org.apache.hadoop.io.LongWritable;
7 | import org.apache.hadoop.io.Text;
8 | import org.apache.hadoop.mapreduce.Mapper;
9 |
10 | /*
11 | * http://blog.csdn.net/boonya/article/details/54959393
12 | * http://blog.csdn.net/guoery/article/details/8529004
13 | * LongWritable: LineNumber
14 | * Text : LineString
15 | * Text : OutKey
16 | * LongWritable: OutValue
17 | */
18 | public class WordCountMapper extends Mapper{
19 |
20 | private final static LongWritable ONE = new LongWritable(1L);
21 | private Text word = new Text();
22 |
23 | @Override
24 | protected void map(LongWritable key, Text value,
25 | Mapper.Context context)
26 | throws IOException, InterruptedException {
27 |
28 | //super.map(key, value, context);
29 | StringTokenizer itr = new StringTokenizer(value.toString());
30 | while (itr.hasMoreTokens()) {
31 | word.set(itr.nextToken());
32 | context.write(word, ONE);
33 |
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountReducer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr.wordcount;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.io.LongWritable;
6 | import org.apache.hadoop.io.Text;
7 | import org.apache.hadoop.mapreduce.Reducer;
8 |
9 | public class WordCountReducer extends Reducer{
10 |
11 | private LongWritable result = new LongWritable();
12 |
13 | @Override
14 | protected void reduce(Text key, Iterable values,
15 | Reducer.Context context)
16 | throws IOException, InterruptedException {
17 |
18 | //super.reduce(arg0, arg1, arg2);
19 | long count = 0;
20 | for (LongWritable value : values) {
21 | count += value.get();
22 | }
23 | result.set(count);
24 | context.write(key, result);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/mr/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/mr/src/test/java/com/xcompany/xproject/mr/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.mr;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 |
5 | com.xcompany.xproject
6 | hadoop
7 | pom
8 | 1.0.0-RELEASE
9 |
10 |
11 | 1.8
12 | 2.7.4
13 |
14 |
16 |
17 |
18 |
19 |
20 |
21 | junit
22 | junit
23 | 4.12
24 | test
25 |
26 |
36 |
37 |
38 |
39 |
73 |
74 |
75 | hdfs
76 | rpc
77 | mr
78 | hive
79 | hbase
80 | storm
81 | kafka
82 | storm-kafka
83 | scala
84 | spark
85 | sparkstreaming
86 | spark-streaming
87 |
88 |
89 |
--------------------------------------------------------------------------------
/rpc/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/rpc/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/rpc/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | rpc
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.wst.jsdt.core.javascriptValidator
10 |
11 |
12 |
13 |
14 | org.eclipse.jdt.core.javabuilder
15 |
16 |
17 |
18 |
19 | org.eclipse.wst.common.project.facet.core.builder
20 |
21 |
22 |
23 |
24 | org.eclipse.wst.validation.validationbuilder
25 |
26 |
27 |
28 |
29 | org.eclipse.m2e.core.maven2Builder
30 |
31 |
32 |
33 |
34 |
35 | org.eclipse.jem.workbench.JavaEMFNature
36 | org.eclipse.wst.common.modulecore.ModuleCoreNature
37 | org.eclipse.jdt.core.javanature
38 | org.eclipse.m2e.core.maven2Nature
39 | org.eclipse.wst.common.project.facet.core.nature
40 | org.eclipse.wst.jsdt.core.jsNature
41 |
42 |
43 |
--------------------------------------------------------------------------------
/rpc/.settings/.jsdtscope:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
4 | org.eclipse.jdt.core.compiler.compliance=1.5
5 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
8 | org.eclipse.jdt.core.compiler.source=1.5
9 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.component:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.common.project.facet.core.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.jsdt.ui.superType.container:
--------------------------------------------------------------------------------
1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.jsdt.ui.superType.name:
--------------------------------------------------------------------------------
1 | Window
--------------------------------------------------------------------------------
/rpc/.settings/org.eclipse.wst.validation.prefs:
--------------------------------------------------------------------------------
1 | disabled=06target
2 | eclipse.preferences.version=1
3 |
--------------------------------------------------------------------------------
/rpc/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop
5 | com.xcompany.xproject
6 | 1.0.0-RELEASE
7 |
8 | 4.0.0
9 | rpc
10 |
11 |
12 | junit
13 | junit
14 | 4.12
15 | test
16 |
17 |
18 | hamcrest-core
19 | org.hamcrest
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/rpc/logs/hdfs.log:
--------------------------------------------------------------------------------
1 | [INFO ] 2017-12-08 10:31:32.239 [] [] [main] org.apache.hadoop.ipc.CallQueueManager.(CallQueueManager.java:57) Using callQueue: class java.util.concurrent.LinkedBlockingQueue queueCapacity: 100
2 | [INFO ] 2017-12-08 10:31:32.875 [] [] [Socket Reader #1 for port 8888] org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:722) Starting Socket Reader #1 for port 8888
3 | [WARN ] 2017-12-08 10:31:33.401 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
4 | [INFO ] 2017-12-08 10:31:33.470 [] [] [main] com.xcompany.xproject.rpc.HelloServer.main(HelloServer.java:32) Server start to listen on 8888
5 | [INFO ] 2017-12-08 10:31:33.486 [] [] [IPC Server listener on 8888] org.apache.hadoop.ipc.Server$Listener.run(Server.java:801) IPC Server listener on 8888: starting
6 | [INFO ] 2017-12-08 10:31:33.487 [] [] [IPC Server Responder] org.apache.hadoop.ipc.Server$Responder.run(Server.java:962) IPC Server Responder: starting
7 | [WARN ] 2017-12-08 10:31:50.842 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
8 | [INFO ] 2017-12-08 10:31:51.734 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) JunneYang
9 | [INFO ] 2017-12-08 10:31:51.761 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:34) Hello JunneYang
10 | [INFO ] 2017-12-08 10:31:51.763 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) Ares
11 | [INFO ] 2017-12-08 10:31:51.766 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:36) Hello Ares
12 |
--------------------------------------------------------------------------------
/rpc/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | rpc
14 | jar
15 |
16 |
17 |
18 | org.apache.hadoop
19 | hadoop-client
20 | 2.7.4
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/App.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloClient.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 |
3 | import java.io.IOException;
4 | import java.net.InetSocketAddress;
5 |
6 | import org.apache.hadoop.conf.Configuration;
7 | import org.apache.hadoop.ipc.RPC;
8 | import org.junit.After;
9 | import org.junit.Before;
10 | import org.junit.Test;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 |
14 | public class HelloClient {
15 |
16 | private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class);
17 |
18 | @Before
19 | public void setUp() {
20 | }
21 | @After
22 | public void tearDown() {
23 | }
24 |
25 | @Test
26 | public void testHello() throws IOException {
27 | String bindAddress = "node-01";
28 | int port = 8888;
29 | InetSocketAddress addr = new InetSocketAddress(bindAddress, port);
30 | HelloProtocol proxy = RPC.getProxy(
31 | HelloProtocol.class, HelloProtocol.versionID,
32 | addr, new Configuration());
33 | String resp = proxy.helloMethod("JunneYang");
34 | LOGGER.info(resp);
35 | resp = proxy.helloMethod("Ares");
36 | LOGGER.info(resp);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloProtocol.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 |
3 | public interface HelloProtocol {
4 |
5 | public static final long versionID = 1L;
6 | public String helloMethod(String name);
7 |
8 | }
9 |
--------------------------------------------------------------------------------
/rpc/src/main/java/com/xcompany/xproject/rpc/HelloServer.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.HadoopIllegalArgumentException;
6 | import org.apache.hadoop.conf.Configuration;
7 | import org.apache.hadoop.ipc.RPC;
8 | import org.apache.hadoop.ipc.RPC.Builder;
9 | import org.apache.hadoop.ipc.RPC.Server;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 |
13 | public class HelloServer implements HelloProtocol {
14 |
15 | private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class);
16 |
17 | public String helloMethod(String name) {
18 | LOGGER.info(name);
19 | return "Hello " + name;
20 | }
21 |
22 | public static void main(String[] args) throws HadoopIllegalArgumentException, IOException {
23 | Configuration conf = new Configuration();
24 | Builder builder = new RPC.Builder(conf);
25 | String bindAddress = "node-01";
26 | int port = 8888;
27 | builder.setBindAddress(bindAddress)
28 | .setPort(8888)
29 | .setProtocol(HelloProtocol.class)
30 | .setInstance(new HelloServer());
31 | Server server = builder.build();
32 | LOGGER.info("Server start to listen on " + port);
33 | server.start();
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/rpc/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ### direct log messages to stdout ###
2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.Target = System.out
4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
6 |
7 | ### direct messages to file test.log ###
8 | log4j.appender.file = org.apache.log4j.RollingFileAppender
9 | log4j.appender.file.File= ./log/hdfs.log
10 | log4j.appender.file.Append = true
11 | log4j.appender.file.MaxFileSize = 1MB
12 | log4j.appender.file.MaxBackupIndex = 10
13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout
14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
15 |
16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender
17 | log4j.appender.dfile.File = ./logs/hdfs.log
18 | log4j.appender.dfile.Append = true
19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout
20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n
21 |
22 | ### set log levels - for more verbose logging change 'info' to 'debug' ###
23 |
24 | #log4j.logger.org.app=debug
25 | #log4j.logger.com.ares=debug, stdout, file, dfile
26 | #log4j.logger.com.xcloud=debug, stdout
27 | #log4j.additivity.com.ares=false
28 |
29 | # log4j.rootLogger=info, stdout
30 | log4j.rootLogger=info, stdout, file, dfile
31 |
--------------------------------------------------------------------------------
/rpc/src/test/java/com/xcompany/xproject/rpc/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.rpc;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/scala/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/scala/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | scala
4 |
5 |
6 |
7 |
8 |
9 | org.scala-ide.sdt.core.scalabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.m2e.core.maven2Nature
21 | org.scala-ide.sdt.core.scalanature
22 | org.eclipse.jdt.core.javanature
23 |
24 |
25 |
--------------------------------------------------------------------------------
/scala/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.5
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.5
14 |
--------------------------------------------------------------------------------
/scala/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/scala/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | com.xcompany.xproject
9 | hadoop
10 | 1.0.0-RELEASE
11 |
12 |
13 | scala
14 |
15 |
16 |
17 | 2.11.11
18 |
19 |
20 |
21 |
22 |
27 |
28 | junit
29 | junit
30 | 4.13.1
31 | test
32 |
33 |
34 | org.specs
35 | specs
36 | 1.2.5
37 | test
38 |
39 |
40 |
41 |
42 |
47 |
62 |
67 |
72 |
73 |
74 |
75 |
76 |
77 | scala-tools.org
78 | Scala-Tools Maven2 Repository
79 | http://scala-tools.org/repo-releases
80 |
81 |
82 |
83 |
84 | scala-tools.org
85 | Scala-Tools Maven2 Repository
86 | http://scala-tools.org/repo-releases
87 |
88 |
89 |
90 | src/main/scala
91 | src/test/scala
92 |
93 |
94 | org.scala-tools
95 | maven-scala-plugin
96 |
98 |
99 | ${scala.version}
100 |
101 | -target:jvm-1.8
102 |
103 |
104 |
105 |
106 | maven-eclipse-plugin
107 |
108 | true
109 |
110 | ch.epfl.lamp.sdt.core.scalabuilder
111 |
112 |
113 | ch.epfl.lamp.sdt.core.scalanature
114 |
115 |
116 | org.eclipse.jdt.launching.JRE_CONTAINER
117 | ch.epfl.lamp.sdt.launching.SCALA_CONTAINER
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 | org.scala-tools
127 | maven-scala-plugin
128 |
129 | ${scala.version}
130 |
131 |
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/scala/src/main/scala/com/xcompany/xproject/scala/App.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.scala
2 |
3 | /**
4 | * http://blog.csdn.net/wuyinxian/article/details/38727717
5 | * http://download.scala-ide.org/sdk/helium/e38/scala211/stable/site
6 | */
7 |
8 |
9 | object App {
10 | def main(args: Array[String]): Unit = {
11 | println("Hello World!");
12 | }
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/spark-streaming.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark-streaming.zip
--------------------------------------------------------------------------------
/spark/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/spark/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/spark/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | spark
4 |
5 |
6 |
7 |
8 |
9 | org.scala-ide.sdt.core.scalabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.scala-ide.sdt.core.scalanature
21 | org.eclipse.jdt.core.javanature
22 | org.eclipse.m2e.core.maven2Nature
23 |
24 |
25 |
--------------------------------------------------------------------------------
/spark/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3 | org.eclipse.jdt.core.compiler.compliance=1.5
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.5
6 |
--------------------------------------------------------------------------------
/spark/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427870000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427870000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427880000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427880000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427890000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427890000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427900000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427900000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427910000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427910000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427920000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427920000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427930000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427930000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427940000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427940000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427950000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427950000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/.checkpoint-1514427960000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427960000.crc
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427870000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427870000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427880000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427880000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427890000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427890000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427900000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427900000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427910000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427910000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427920000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427920000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427930000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427930000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427940000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427940000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427950000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427950000
--------------------------------------------------------------------------------
/spark/checkpoint/checkpoint-1514427960000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427960000
--------------------------------------------------------------------------------
/spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017
--------------------------------------------------------------------------------
/spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/App.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | object App {
8 | def main(args: Array[String]): Unit = {
9 | println("Hello World!");
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/WordCount.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.SparkContext
5 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
6 |
7 | object WordCount {
8 | def main(args: Array[String]): Unit = {
9 | // println(args(0))
10 | // println("Hello World!")
11 | val conf = new SparkConf().setAppName("WordCount")
12 | // .setMaster("spark://node-01:7077")
13 | // .setMaster("local")
14 | // .set("spark.executor.memory", "1g")
15 | // .set("spark.cores.max", "1")
16 | val sc = new SparkContext(conf)
17 |
18 | val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/")
19 | val words = lines.flatMap(line => line.split(" "))
20 | val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b)
21 | wordCounts.collect().foreach(println)
22 | wordCounts.partitions.length
23 | // wordCounts.saveAsTextFile("file:///tmp/output")
24 |
25 | sc.stop()
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/BroadcastWrapper.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark.streaming
2 |
3 | import scala.collection.mutable
4 |
5 | import org.apache.spark.SparkContext
6 | import org.apache.spark.broadcast.Broadcast
7 | import java.io.ObjectInputStream
8 | import java.io.ObjectOutputStream
9 | import java.util.Calendar
10 | import java.text.SimpleDateFormat
11 |
12 | //http://spark.apache.org/docs/2.2.0/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints
13 | object BroadcastWrapper {
14 | @volatile private var instance: Broadcast[Map[String, String]] = null
15 | private val map = mutable.LinkedHashMap[String, String]()
16 |
17 | def getUpdateInfo(): Map[String, String] = {
18 | val jedis_driver = RedisClient.pool.getResource
19 | println("=====GET_DRIVER")
20 | val is_update = jedis_driver.lpop("is_update")
21 | println("is_update: " + is_update)
22 | println("=====READ_DRIVER")
23 |
24 | // if (null == is_update) {
25 | // rdd.sparkContext.broadcast(is_update)
26 | // }
27 |
28 | jedis_driver.close()
29 | println("=====CLOSE_DRIVER")
30 |
31 | map += ("is_update" -> is_update)
32 | val broadcast_info = jedis_driver.get("broadcast_info")
33 | map += ("broadcast_info" -> broadcast_info)
34 | map.toMap
35 | }
36 |
37 | def getInstance(sc: SparkContext): Broadcast[Map[String, String]] = {
38 | if (instance == null) {
39 | synchronized {
40 | if (instance == null) {
41 | val updateInfo = getUpdateInfo()
42 | // https://www.jianshu.com/p/95896d06a94d
43 | if (Some(null) != updateInfo.get("is_update")) {
44 | instance = sc.broadcast(updateInfo)
45 | }
46 | }
47 | }
48 | }
49 | instance
50 | }
51 |
52 | def broadcastInfo(sc: SparkContext, blocking: Boolean = false): Unit = {
53 | val updateInfo = getUpdateInfo()
54 | // https://www.jianshu.com/p/95896d06a94d
55 | if (Some(null) != updateInfo.get("is_update")) {
56 | if (instance != null) {
57 | instance.unpersist(blocking)
58 | }
59 | instance = sc.broadcast(updateInfo)
60 | }
61 |
62 | // val calendar = Calendar.getInstance()
63 | // val date = calendar.getTime()
64 | // val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")
65 | // val dateFormat = format.format(date)
66 | // println("=====broadcat success: " + dateFormat)
67 | }
68 |
69 | // private def writeObject(out: ObjectOutputStream): Unit = {
70 | // out.writeObject(instance)
71 | // }
72 | //
73 | // private def readObject(in: ObjectInputStream): Unit = {
74 | // instance = in.readObject().asInstanceOf[Broadcast[Map[String, String]]]
75 | // }
76 |
77 | }
78 |
79 |
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/NetworkWordCount.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark.streaming
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.streaming.Seconds
5 | import org.apache.spark.streaming.StreamingContext
6 | import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
7 |
8 | object NetworkWordCount {
9 | def main(args: Array[String]): Unit = {
10 | val conf = new SparkConf().setAppName("NetworkWordCount")
11 | // .setMaster("spark://node-01:7077")
12 | // .setMaster("local")
13 | // .set("spark.executor.memory", "1g")
14 | // .set("spark.cores.max", "1")
15 | val ssc = new StreamingContext(conf, Seconds(10))
16 | val lines = ssc.socketTextStream("node-01", 9999)
17 | val words = lines.flatMap(_.split(" "))
18 | val pairs = words.map(word => (word, 1))
19 | val wordCounts = pairs.reduceByKey(_ + _)
20 | wordCounts.print()
21 |
22 |
23 | wordCounts.foreachRDD { rdd =>
24 | rdd.foreachPartition { partitionOfRecords => {
25 | // val connection = ConnectionPool.getConnection()
26 | partitionOfRecords.foreach(record => {
27 | // val sql = "insert into streaming_itemcount(item,count) values('" + record._1 + "'," + record._2 + ")"
28 | // val stmt = connection.createStatement
29 | // stmt.executeUpdate(sql)
30 | println(record)
31 | })
32 | // ConnectionPool.returnConnection(connection)
33 | }}
34 | }
35 |
36 | ssc.start() // Start the computation
37 | ssc.awaitTermination() // Wait for the computation to terminate
38 | }
39 | }
40 |
41 |
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/RedisClient.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark.streaming
2 |
3 | import org.apache.commons.pool2.impl.GenericObjectPoolConfig
4 | import redis.clients.jedis.JedisPool
5 | import redis.clients.jedis.JedisPoolConfig
6 |
7 | //http://blog.csdn.net/qq_26525215/article/details/60466222
8 | //https://segmentfault.com/a/1190000005085077
9 | object RedisClient {
10 | val host = "node-04"
11 | val port = 63791
12 | val timeout = 50000
13 | val password = "123456"
14 | val database = 0
15 | val maxTotal = 10
16 | val maxIdle = 5
17 | val maxWaitMillis = timeout
18 | val testOnBorrow = true
19 |
20 | val config = new JedisPoolConfig
21 | config.setMaxTotal(maxTotal)
22 | config.setMaxIdle(maxIdle)
23 | config.setMaxWaitMillis(maxWaitMillis)
24 | config.setTestOnBorrow(testOnBorrow)
25 |
26 | // must lazy
27 | lazy val pool = new JedisPool(config, host, port, timeout, password, database)
28 |
29 | lazy val hook = new Thread {
30 | override def run = {
31 | println("Execute hook thread: " + this)
32 | pool.destroy()
33 | }
34 | }
35 | sys.addShutdownHook(hook.run)
36 | }
37 |
--------------------------------------------------------------------------------
/spark/src/main/scala/com/xcompany/xproject/spark/streaming/WaitForReady.scala:
--------------------------------------------------------------------------------
1 | package com.xcompany.xproject.spark.streaming
2 |
3 | object WaitForReady {
4 | private val PREFIX = "streaming"
5 | private val IS_READY = PREFIX + ":is_ready"
6 |
7 | def waitForReady(): Unit = {
8 | val jedis_main = RedisClient.pool.getResource
9 | var is_ready = jedis_main.get(IS_READY)
10 | print(is_ready.toBoolean)
11 | while (null == is_ready) {
12 | println("is_ready: " + is_ready + ", continue waitFor...")
13 | Thread.sleep(5000)
14 | is_ready = jedis_main.get("is_ready")
15 | }
16 | jedis_main.close()
17 | println("is_ready: " + is_ready + ", start to submitJob...")
18 | }
19 |
20 | def main(args: Array[String]): Unit = {
21 | waitForReady()
22 | }
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/spark/src/test/scala/com/xcompany/xproject/spark/AppTest.scala:
--------------------------------------------------------------------------------
1 | //package com.xcompany.xproject.spark
2 | //
3 | //import org.junit._
4 | //import Assert._
5 | //
6 | //@Test
7 | //class AppTest {
8 | //
9 | // @Test
10 | // def testOK() = assertTrue(true)
11 | //
12 | //// @Test
13 | //// def testKO() = assertTrue(false)
14 | //
15 | //}
16 | //
17 | //
18 |
--------------------------------------------------------------------------------
/spark/src/test/scala/com/xcompany/xproject/spark/MySpec.scala:
--------------------------------------------------------------------------------
1 | //package com.xcompany.xproject.spark
2 | //
3 | //import org.specs._
4 | //import org.specs.runner.{ConsoleRunner, JUnit4}
5 | //
6 | //class MySpecTest extends JUnit4(MySpec)
7 | ////class MySpecSuite extends ScalaTestSuite(MySpec)
8 | //object MySpecRunner extends ConsoleRunner(MySpec)
9 | //
10 | //object MySpec extends Specification {
11 | // "This wonderful system" should {
12 | // "save the world" in {
13 | // val list = Nil
14 | // list must beEmpty
15 | // }
16 | // }
17 | //}
18 |
--------------------------------------------------------------------------------
/spark_data/a.txt:
--------------------------------------------------------------------------------
1 | hello tom
2 | hello jerry
3 | hello tom
4 |
--------------------------------------------------------------------------------
/spark_data/b.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello jerry
3 | tom jerry
4 |
--------------------------------------------------------------------------------
/spark_data/c.txt:
--------------------------------------------------------------------------------
1 | hello jerry
2 | hello tom
3 |
--------------------------------------------------------------------------------
/sparkstreaming.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/sparkstreaming.zip
--------------------------------------------------------------------------------
/storm-kafka.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm-kafka.zip
--------------------------------------------------------------------------------
/storm.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm.zip
--------------------------------------------------------------------------------
/tensorflow/01-TemsorFlow的模块与API.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/01-TemsorFlow的模块与API.png
--------------------------------------------------------------------------------
/tensorflow/02-TensorFlow架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/02-TensorFlow架构.png
--------------------------------------------------------------------------------
/tensorflow/02-TensorFlow核心基础知识.txt:
--------------------------------------------------------------------------------
1 | 一、Tensor张量使用(可以使用constant、variable、placeholder进行赋值)
2 | import tensorflow as tf
3 | tf.constant("Hello, TensorFlow", dtype=tf.string)
4 | tf.constant([1, 2, 3, 4, 5], dtype=tf.int32)
5 | tf.Variable([[1, 2], [3, 4]], dtype=tf.int32)
6 | tf.zeros(shape=(2, 3, 4), dtype=tf.int32)
7 | tf.constant(0, shape=(2, 3, 4), dtype=tf.int32)
8 | a = tf.constant(0, shape=(2, 3, 4), dtype=tf.int32)
9 | tf.rank(a)
10 |
11 | 二、Variable变量使用(2.X版本之前默认是懒加载,2.X之后是即时加载;
12 | 2.X版本之前使用Saver、之后使用Checkpoint进行变量的存储与恢复操作)
13 | import tensorflow as tf
14 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1))
15 | b = tf.Variable(tf.zeros([10]))
16 | print([W, b])
17 | b.assign(b + tf.constant(1.0, shape=[10]))
18 | checkpoint = tf.train.Checkpoint(W=W, b=b)
19 | checkpoint.save('./demo/demo-model')
20 | b.assign(b + tf.constant(1.0, shape=[10]))
21 | print(W, b)
22 | checkpoint.restore('./demo/demo-model-1')
23 | print(W, b)
24 |
25 |
26 | import tensorflow as tf
27 | tf.compat.v1.disable_eager_execution()
28 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1))
29 | b = tf.Variable(tf.zeros([10]))
30 | print([W, b])
31 | sess = tf.compat.v1.Session()
32 | sess.run(tf.compat.v1.global_variables_initializer())
33 | sess.run([W, b])
34 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10])))
35 | saver = tf.compat.v1.train.Saver({'W': W, 'b': b})
36 | saver.save(sess, './demo/demo-model', global_step=0)
37 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10])))
38 | saver.restore(sess, './demo/demo-model-0')
39 | sess.run(b)
40 |
41 | 三、占位符与操作的使用(2.X版本已经移除了placeholder,使用placeholder定义占位符,使用feed填充占位符)
42 | import tensorflow as tf
43 | tf.compat.v1.disable_eager_execution()
44 | a = tf.constant(123)
45 | b = tf.constant(456)
46 | x = tf.compat.v1.placeholder(tf.int16, shape=())
47 | y = tf.compat.v1.placeholder(tf.int16, shape=())
48 | add = tf.add(x, y)
49 | mul = tf.multiply(x, y)
50 | sess = tf.compat.v1.Session()
51 | sess.run(add, feed_dict={x: 10, y: 5})
52 | sess.run(mul, feed_dict={x: 2, y: 3})
53 |
54 | 四、查看设备列表
55 | import tensorflow as tf
56 | tf.config.list_physical_devices()
57 |
58 | from tensorflow.python.client import device_lib
59 | dl = device_lib.list_local_devices()
60 | print(dl)
61 |
62 | import tensorflow as tf
63 | print([tf.__version__, tf.test.is_gpu_available()])
64 |
--------------------------------------------------------------------------------
/tensorflow/1-机器学习基础.ipynb:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/10-模型定义与查看.ipynb:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/4-数据存储DataStore访问.ipynb:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/5-注册数据集Dataset.ipynb:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/MLOps流水线参考.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/MLOps流水线参考.png
--------------------------------------------------------------------------------
/tensorflow/README:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tensorflow/env/README:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tensorflow/env/aml-demo-conda-dependencies.yaml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/requests_futures使用参考.txt:
--------------------------------------------------------------------------------
1 | from concurrent.futures import ThreadPoolExecutor
2 | from requests_futures.sessions import FuturesSession
3 | from concurrent.futures import as_completed
4 |
5 |
6 | session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
7 |
8 | futures=[session.post(f'http://httpbin.org/post', json={"name": "helloworld-" + str(i)}, headers={"Content-Type":"application/json"}) for i in range(3)]
9 |
10 | for future in as_completed(futures):
11 | resp = future.result()
12 | print(resp.text)
13 |
14 |
--------------------------------------------------------------------------------
/tensorflow/src/README:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tensorflow/src/job-dist.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/src/job.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/src/train.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tensorflow/性能测试.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/性能测试.png
--------------------------------------------------------------------------------
/我的书签.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/我的书签.rar
--------------------------------------------------------------------------------