├── .classpath ├── .project ├── .settings ├── .jsdtscope ├── org.eclipse.jdt.core.prefs ├── org.eclipse.m2e.core.prefs ├── org.eclipse.wst.common.component ├── org.eclipse.wst.common.project.facet.core.xml ├── org.eclipse.wst.jsdt.ui.superType.container ├── org.eclipse.wst.jsdt.ui.superType.name └── org.eclipse.wst.validation.prefs ├── LICENSE ├── README.md ├── conf_data ├── HA高可用场景 │ └── HA部署.txt ├── HIVE安装.txt ├── HTTP_20130313143750.dat ├── ZK搭建步骤.txt ├── a.txt ├── b.txt ├── c.txt ├── flowsort-data ├── hadoop-env.sh ├── hbase安装配置 │ ├── backup-masters │ ├── core-site.xml │ ├── hbase shell.txt │ ├── hbase-env.sh │ ├── hbase-site.xml │ ├── hbase集群搭建.txt │ ├── hdfs-site.xml │ ├── regionservers │ └── 笔记.txt ├── hive HQL语法示例.txt ├── hive-default.xml.template ├── hive-site.xml ├── hive-udf.txt ├── hive.txt ├── hive安装-视频.txt ├── hive笔记.txt ├── kafka安装配置 │ ├── kafka安装 │ ├── kafka笔记.txt │ ├── server-1.properties │ ├── server-2.properties │ └── server-3.properties ├── order.txt ├── spark安装部署.txt ├── spark运行命令样例.txt ├── storm安装配置 │ ├── storm-trainning-v1.0-zs.ppt │ ├── storm.yaml │ └── storm安装手册及笔记.txt ├── udf.txt ├── udt.test.txt ├── word-count.txt ├── zoo.cfg ├── zoo_sample.cfg └── 非HA场景 │ ├── Hadoop搭建步骤-非HA场景.txt │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── mapred-site.xml │ ├── masters │ ├── slaves │ └── yarn-site.xml ├── data_analyze.jpg ├── data_analyze.png ├── hadoop.jpg ├── hbase ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── xcompany │ │ │ └── xproject │ │ │ └── hbase │ │ │ ├── App.java │ │ │ └── HBaseTest.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── xcompany │ └── xproject │ └── hbase │ └── AppTest.java ├── hdfs ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── dependency-reduced-pom.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── xcompany │ │ │ └── xproject │ │ │ └── hdfs │ │ │ └── App.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── xcompany │ └── xproject │ └── hdfs │ ├── AppTest.java │ └── HDFSTest.java ├── hive ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── dependency-reduced-pom.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── xcompany │ │ │ └── xproject │ │ │ └── hive │ │ │ └── Phone2Area.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── xcompany │ └── xproject │ └── hive │ └── AppTest.java ├── kafka ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── xcompany │ │ └── xproject │ │ └── kafka │ │ ├── TestConsumer.java │ │ └── TestProducer.java │ └── resources │ └── log4j.properties ├── mmdetection ├── 1-mmdection安装使用记录.txt ├── 2-mmdection预测新数据.txt ├── 3-mmdection模型指标测试.txt └── README ├── mr ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── dependency-reduced-pom.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── xcompany │ │ │ └── xproject │ │ │ └── mr │ │ │ ├── App.java │ │ │ ├── flowpartition │ │ │ ├── FlowBean.java │ │ │ ├── FlowPartition.java │ │ │ ├── FlowPartitionJob.java │ │ │ ├── FlowPartitionMapper.java │ │ │ └── FlowPartitionReducer.java │ │ │ ├── flowsort │ │ │ ├── FlowBean.java │ │ │ ├── FlowSortJob.java │ │ │ ├── FlowSortMapper.java │ │ │ └── FlowSortReducer.java │ │ │ ├── flowsum │ │ │ ├── FlowBean.java │ │ │ ├── FlowSumJob.java │ │ │ ├── FlowSumMapper.java │ │ │ └── FlowSumReducer.java │ │ │ ├── invertedindex │ │ │ ├── StepOneJob.java │ │ │ ├── StepOneMapper.java │ │ │ ├── StepOneReducer.java │ │ │ ├── StepTwoJob.java │ │ │ ├── StepTwoMapper.java │ │ │ └── StepTwoReducer.java │ │ │ └── wordcount │ │ │ ├── WordCountJob.java │ │ │ ├── WordCountMapper.java │ │ │ └── WordCountReducer.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── xcompany │ └── xproject │ └── mr │ └── AppTest.java ├── pom.xml ├── rpc ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── .jsdtscope │ ├── org.eclipse.jdt.core.prefs │ ├── org.eclipse.m2e.core.prefs │ ├── org.eclipse.wst.common.component │ ├── org.eclipse.wst.common.project.facet.core.prefs.xml │ ├── org.eclipse.wst.common.project.facet.core.xml │ ├── org.eclipse.wst.jsdt.ui.superType.container │ ├── org.eclipse.wst.jsdt.ui.superType.name │ └── org.eclipse.wst.validation.prefs ├── dependency-reduced-pom.xml ├── log │ └── hdfs.log ├── logs │ ├── hdfs.log │ └── hdfs.log.2017-11-06 ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── xcompany │ │ │ └── xproject │ │ │ └── rpc │ │ │ ├── App.java │ │ │ ├── HelloClient.java │ │ │ ├── HelloProtocol.java │ │ │ └── HelloServer.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── xcompany │ └── xproject │ └── rpc │ └── AppTest.java ├── scala ├── .cache ├── .cache-main ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ ├── org.eclipse.m2e.core.prefs │ └── org.scala-ide.sdt.core.prefs ├── pom.xml └── src │ └── main │ └── scala │ └── com │ └── xcompany │ └── xproject │ └── scala │ └── App.scala ├── spark-streaming.zip ├── spark ├── .cache-main ├── .cache-tests ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── checkpoint │ ├── .checkpoint-1514427870000.crc │ ├── .checkpoint-1514427880000.crc │ ├── .checkpoint-1514427890000.crc │ ├── .checkpoint-1514427900000.crc │ ├── .checkpoint-1514427910000.crc │ ├── .checkpoint-1514427920000.crc │ ├── .checkpoint-1514427930000.crc │ ├── .checkpoint-1514427940000.crc │ ├── .checkpoint-1514427950000.crc │ ├── .checkpoint-1514427960000.crc │ ├── checkpoint-1514427870000 │ ├── checkpoint-1514427880000 │ ├── checkpoint-1514427890000 │ ├── checkpoint-1514427900000 │ ├── checkpoint-1514427910000 │ ├── checkpoint-1514427920000 │ ├── checkpoint-1514427930000 │ ├── checkpoint-1514427940000 │ ├── checkpoint-1514427950000 │ ├── checkpoint-1514427960000 │ └── receivedBlockMetadata │ │ ├── log-1514427870017-1514427930017 │ │ └── log-1514427932107-1514427992107 ├── pom.xml └── src │ ├── main │ └── scala │ │ └── com │ │ └── xcompany │ │ └── xproject │ │ └── spark │ │ ├── App.scala │ │ ├── WordCount.scala │ │ └── streaming │ │ ├── BroadcastWrapper.scala │ │ ├── KafkaWordCount.scala │ │ ├── NetworkWordCount.scala │ │ ├── RedisClient.scala │ │ └── WaitForReady.scala │ └── test │ └── scala │ └── com │ └── xcompany │ └── xproject │ └── spark │ ├── AppTest.scala │ └── MySpec.scala ├── spark_data ├── a.txt ├── b.txt └── c.txt ├── sparkstreaming.zip ├── storm-kafka.zip ├── storm.zip ├── tensorflow ├── 01-TemsorFlow的模块与API.png ├── 01-TensorFlow基本概念与HelloWorld.txt ├── 02-TensorFlow架构.png ├── 02-TensorFlow核心基础知识.txt ├── 03-MNIST手写体数据集训练.txt ├── 1-机器学习基础.ipynb ├── 10-模型定义与查看.ipynb ├── 11-AML本地交互式训练.ipynb ├── 12-AML远程单节点训练.ipynb ├── 13-AML远程分布式训练.ipynb ├── 2-MNIST手写体数据集.ipynb ├── 3-工作区workspace访问.ipynb ├── 4-数据存储DataStore访问.ipynb ├── 5-注册数据集Dataset.ipynb ├── 6-标记数据集Dataset.ipynb ├── 7-加载数据集Dataset.ipynb ├── 8-规范化数据集Dataset.ipynb ├── 9-统计分析数据集Dataset.ipynb ├── MLOps流水线参考.png ├── README ├── env │ ├── README │ └── aml-demo-conda-dependencies.yaml ├── requests_futures使用参考.txt ├── src │ ├── README │ ├── job-dist.py │ ├── job.py │ ├── train-dist.py │ └── train.py └── 性能测试.png └── 我的书签.rar /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | hadoop 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.wst.jsdt.core.javascriptValidator 10 | 11 | 12 | 13 | 14 | org.eclipse.jdt.core.javabuilder 15 | 16 | 17 | 18 | 19 | org.eclipse.wst.common.project.facet.core.builder 20 | 21 | 22 | 23 | 24 | org.eclipse.m2e.core.maven2Builder 25 | 26 | 27 | 28 | 29 | org.eclipse.wst.validation.validationbuilder 30 | 31 | 32 | 33 | 34 | 35 | org.eclipse.jem.workbench.JavaEMFNature 36 | org.eclipse.wst.common.modulecore.ModuleCoreNature 37 | org.eclipse.jdt.core.javanature 38 | org.eclipse.m2e.core.maven2Nature 39 | org.eclipse.wst.common.project.facet.core.nature 40 | org.eclipse.wst.jsdt.core.jsNature 41 | 42 | 43 | -------------------------------------------------------------------------------- /.settings/.jsdtscope: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.8 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 13 | org.eclipse.jdt.core.compiler.source=1.8 14 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /.settings/org.eclipse.wst.common.component: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.settings/org.eclipse.wst.common.project.facet.core.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.settings/org.eclipse.wst.jsdt.ui.superType.container: -------------------------------------------------------------------------------- 1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary -------------------------------------------------------------------------------- /.settings/org.eclipse.wst.jsdt.ui.superType.name: -------------------------------------------------------------------------------- 1 | Window -------------------------------------------------------------------------------- /.settings/org.eclipse.wst.validation.prefs: -------------------------------------------------------------------------------- 1 | disabled=06target 2 | eclipse.preferences.version=1 3 | -------------------------------------------------------------------------------- /conf_data/HA高可用场景/HA部署.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/HA高可用场景/HA部署.txt -------------------------------------------------------------------------------- /conf_data/HTTP_20130313143750.dat: -------------------------------------------------------------------------------- 1 | 1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200 2 | 1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 200 3 | 1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 200 4 | 1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 200 5 | 1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 200 6 | 1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 200 7 | 1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 200 8 | 1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.xxx.cn 信息安全 20 20 3156 2936 200 9 | 1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 200 10 | 1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960 690 200 11 | 1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 200 12 | 1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 200 13 | 1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 200 14 | 1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 200 15 | 1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 200 16 | 1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 12 12 3008 3720 200 17 | 1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 200 18 | 1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 200 19 | 1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 200 20 | 1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 2 2 120 120 200 21 | 1363157985079 13823070001 20-7C-8F-70-68-1F:CMCC 120.196.100.99 6 3 360 180 200 22 | 1363157985069 13600217502 00-1F-64-E2-E8-B1:CMCC 120.196.100.55 18 138 1080 186852 200 23 | -------------------------------------------------------------------------------- /conf_data/ZK搭建步骤.txt: -------------------------------------------------------------------------------- 1 | 版本选择: http://blog.csdn.net/anningzhu/article/details/60468723 2 | http://hbase.apache.org/book.html#zookeeper.requirements 3 | 选择:zookeeper-3.4.9.tar.gz 4 | 5 | /home/xxproject/lib 6 | tar -xzvf zookeeper-3.4.9.tar.gz 7 | ln -sf zookeeper-3.4.9 zookeeper 8 | mkdir -p /home/xxproject/data/zookeeper 9 | zoo.cfg 配置拷贝到 /home/xxproject/lib/zookeeper-3.4.9/conf 下面 10 | 11 | echo ' 12 | # !!!No Modification, This Section is Auto Generated by ZooKeeper 13 | export ZK_HOME=/home/xxproject/lib/zookeeper 14 | export PATH=${PATH}:${ZK_HOME}/bin 15 | ' >> ~/.bash_profile 16 | source ~/.bash_profile 17 | 18 | 三台机器分别执行 19 | echo 1 > /home/xxproject/data/zookeeper/myid 20 | echo 2 > /home/xxproject/data/zookeeper/myid 21 | echo 3 > /home/xxproject/data/zookeeper/myid 22 | 23 | # 启动ZK 24 | #./zookeeper/bin/zkServer.sh start 25 | #./zookeeper/bin/zkServer.sh status 26 | # bin/zkCli.sh -server 127.0.0.1:2181 27 | zkServer.sh start 28 | zkServer.sh status 29 | # jps QuorumPeerMain 30 | -------------------------------------------------------------------------------- /conf_data/a.txt: -------------------------------------------------------------------------------- 1 | hello tom 2 | hello jerry 3 | hello tom 4 | -------------------------------------------------------------------------------- /conf_data/b.txt: -------------------------------------------------------------------------------- 1 | hello jerry 2 | hello jerry 3 | tom jerry 4 | -------------------------------------------------------------------------------- /conf_data/c.txt: -------------------------------------------------------------------------------- 1 | hello jerry 2 | hello tom 3 | -------------------------------------------------------------------------------- /conf_data/flowsort-data: -------------------------------------------------------------------------------- 1 | 13480253104 180 180 360 2 | 13502468823 7335 110349 117684 3 | 13560439658 2034 5892 7926 4 | 13600217502 1080 186852 187932 5 | 13602846565 1938 2910 4848 6 | 13660577991 6960 690 7650 7 | 13719199419 240 0 240 8 | 13726230503 2481 24681 27162 9 | 13760778710 120 120 240 10 | 13823070001 360 180 540 11 | 13826544101 264 0 264 12 | 13922314466 3008 3720 6728 13 | 13925057413 11058 48243 59301 14 | 13926251106 240 0 240 15 | 13926435656 132 1512 1644 16 | 15013685858 3659 3538 7197 17 | 15920133257 3156 2936 6092 18 | 15989002119 1938 180 2118 19 | 18211575961 1527 2106 3633 20 | 18320173382 9531 2412 11943 21 | 84138413 4116 1432 5548 22 | -------------------------------------------------------------------------------- /conf_data/hadoop-env.sh: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Set Hadoop-specific environment variables here. 18 | 19 | # The only required environment variable is JAVA_HOME. All others are 20 | # optional. When running a distributed configuration it is best to 21 | # set JAVA_HOME in this file, so that it is correctly defined on 22 | # remote nodes. 23 | 24 | # The java implementation to use. 25 | JAVA_HOME=/home/xxproject/lib/jdk 26 | export JAVA_HOME=${JAVA_HOME} 27 | 28 | # The jsvc implementation to use. Jsvc is required to run secure datanodes 29 | # that bind to privileged ports to provide authentication of data transfer 30 | # protocol. Jsvc is not required if SASL is configured for authentication of 31 | # data transfer protocol using non-privileged ports. 32 | #export JSVC_HOME=${JSVC_HOME} 33 | 34 | export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} 35 | 36 | # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. 37 | for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do 38 | if [ "$HADOOP_CLASSPATH" ]; then 39 | export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f 40 | else 41 | export HADOOP_CLASSPATH=$f 42 | fi 43 | done 44 | 45 | # The maximum amount of heap to use, in MB. Default is 1000. 46 | #export HADOOP_HEAPSIZE= 47 | #export HADOOP_NAMENODE_INIT_HEAPSIZE="" 48 | 49 | # Extra Java runtime options. Empty by default. 50 | export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" 51 | 52 | # Command specific options appended to HADOOP_OPTS when specified 53 | export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS" 54 | export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" 55 | 56 | export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" 57 | 58 | export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" 59 | export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" 60 | 61 | # The following applies to multiple commands (fs, dfs, fsck, distcp etc) 62 | export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" 63 | #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" 64 | 65 | # On secure datanodes, user to run the datanode as after dropping privileges. 66 | # This **MUST** be uncommented to enable secure HDFS if using privileged ports 67 | # to provide authentication of data transfer protocol. This **MUST NOT** be 68 | # defined if SASL is configured for authentication of data transfer protocol 69 | # using non-privileged ports. 70 | export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} 71 | 72 | # Where log files are stored. $HADOOP_HOME/logs by default. 73 | #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER 74 | 75 | # Where log files are stored in the secure data environment. 76 | export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} 77 | 78 | ### 79 | # HDFS Mover specific parameters 80 | ### 81 | # Specify the JVM options to be used when starting the HDFS Mover. 82 | # These options will be appended to the options specified as HADOOP_OPTS 83 | # and therefore may override any similar flags set in HADOOP_OPTS 84 | # 85 | # export HADOOP_MOVER_OPTS="" 86 | 87 | ### 88 | # Advanced Users Only! 89 | ### 90 | 91 | # The directory where pid files are stored. /tmp by default. 92 | # NOTE: this should be set to a directory that can only be written to by 93 | # the user that will run the hadoop daemons. Otherwise there is the 94 | # potential for a symlink attack. 95 | export HADOOP_PID_DIR=${HADOOP_PID_DIR} 96 | export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} 97 | 98 | # A string representing this instance of hadoop. $USER by default. 99 | export HADOOP_IDENT_STRING=$USER 100 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/backup-masters: -------------------------------------------------------------------------------- 1 | node-03 2 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | fs.defaultFS 23 | hdfs://node-01:9000 24 | The name of the default file system. 25 | 26 | 27 | 28 | hadoop.tmp.dir 29 | /home/xxproject/data/hadoop/tmp 30 | A base for other temporary directories. 31 | 32 | 33 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/hbase shell.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase shell.txt -------------------------------------------------------------------------------- /conf_data/hbase安装配置/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | 26 | hbase.rootdir 27 | hdfs://node-01:9000/hbase 28 | 29 | 30 | 31 | hbase.cluster.distributed 32 | true 33 | 34 | 35 | 36 | hbase.zookeeper.quorum 37 | node-01:2181,node-02:2181,node-03:2181 38 | 39 | 40 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/hbase集群搭建.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hbase安装配置/hbase集群搭建.txt -------------------------------------------------------------------------------- /conf_data/hbase安装配置/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | dfs.namenode.secondary.http-address 23 | node-02:50090 24 | The secondary namenode http server address and port. 25 | 26 | 27 | dfs.namenode.secondary.https-address 28 | node-02:50091 29 | The secondary namenode HTTPS server address and port. 30 | 31 | 32 | 33 | dfs.namenode.http-address 34 | node-01:50070 35 | The address and the base port where the dfs namenode web ui will listen on. 36 | 37 | 38 | 39 | dfs.replication 40 | 3 41 | 42 | 43 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/regionservers: -------------------------------------------------------------------------------- 1 | node-02 2 | node-03 3 | node-04 4 | -------------------------------------------------------------------------------- /conf_data/hbase安装配置/笔记.txt: -------------------------------------------------------------------------------- 1 | cd /home/xxproject/lib 2 | tar -xzvf hbase-1.2.6-bin.tar.gz 3 | ln -sf hbase-1.2.6 hbase 4 | 5 | hbase-env.sh 中添加: 6 | export JAVA_HOME=/home/xxproject/lib/jdk 7 | export HBASE_MANAGES_ZK=false 8 | 9 | hdfs的 core-site.xml、hdfs-site.xml 拷贝到 hbase的配置文件目录---------因为hadoop不是ns的方式,所以应该是不需要的 10 | 11 | 修改hbase-site.xml配置如下: 12 | 13 | 14 | hbase.rootdir 15 | hdfs://node-01:9000/hbase 16 | 17 | 18 | 19 | hbase.cluster.distributed 20 | true 21 | 22 | 23 | 24 | hbase.zookeeper.quorum 25 | node-01:2181,node-02:2181,node-03:2181 26 | 27 | 28 | echo ' 29 | # !!!No Modification, This Section is Auto Generated by ZooKeeper 30 | export HBASE_HOME=/home/xxproject/lib/hbase 31 | export PATH=${PATH}:${HBASE_HOME}/bin 32 | ' >> ~/.bash_profile 33 | source ~/.bash_profile 34 | 35 | hmaster--regionserver--hmaster-backup 36 | HMaster执行: 37 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master/hbase-daemon.sh start master 38 | regionserver节点上都执行: 39 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start regionserver 40 | HMaster-BackUp也执行: 41 | hbase-daemon.sh --config /home/xxproject/lib/hbase/conf/ start master 42 | 43 | 该步骤暂时屏蔽--有问题: 44 | ```5.启动所有的hbase 45 | 分别启动zk 46 | ./zkServer.sh start 47 | 启动hbase集群 48 | start-dfs.sh 49 | 启动hbase,在主节点上运行: 50 | start-hbase.sh 51 | 6.通过浏览器访问hbase管理页面 52 | 192.168.1.201:60010 53 | 7.为保证集群的可靠性,要启动多个HMaster 54 | hbase-daemon.sh start master 55 | ``` 56 | 57 | 测试一下: 58 | http://10.20.0.12:16010/master-status 59 | http://10.20.0.12:16030/rs-status 60 | 可以杀死master节点看看备节点会不会升主 61 | 62 | hbase shell 试用一下: 63 | create 'mygirls', {NAME => 'base_info', VERSIONS => 3}, {NAME => 'extra_info'} 64 | describe 'mygirls' 65 | put 'mygirls', '0001', 'base_info:name', 'fengjie' 66 | put 'mygirls', '0001', 'base_info:age', '28' 67 | put 'mygirls', '0001', 'base_info:gender', 'feamle' 68 | put 'mygirls', '0001', 'extra_info:boyfriend', 'xiaoming' 69 | get 'mygirls', '0001' 70 | get 'mygirls', '0001', 'base_info' 71 | get 'mygirls', '0001', 'extra_info:boyfriend' 72 | 73 | put 'mygirls', '0001', 'base_info:name', 'fengbaobao' 74 | get 'mygirls', '0001', {COLUMN => 'base_info', VERSIONS => 3} 75 | 76 | get 只能一次一行数据, 返回多行用scan 77 | scan 'mygirls', {COLUMNS => ['base_info'], LIMIT => 10, STARTROW => '0001', VERSIONS => 10} 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /conf_data/hive HQL语法示例.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive HQL语法示例.txt -------------------------------------------------------------------------------- /conf_data/hive-udf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive-udf.txt -------------------------------------------------------------------------------- /conf_data/hive.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive.txt -------------------------------------------------------------------------------- /conf_data/hive安装-视频.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive安装-视频.txt -------------------------------------------------------------------------------- /conf_data/hive笔记.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/hive笔记.txt -------------------------------------------------------------------------------- /conf_data/kafka安装配置/kafka安装: -------------------------------------------------------------------------------- 1 | kafka笔记 2 | 3 | 4 | 集群安装 5 | 1、解压 6 | 2、修改server.properties 7 | broker.id=1 8 | zookeeper.connect=weekend05:2181,weekend06:2181,weekend07:2181 9 | 10 | 3、将zookeeper集群启动 11 | 12 | 4、在每一台节点上启动broker 13 | bin/kafka-server-start.sh config/server.properties 14 | 15 | 5、在kafka集群中创建一个topic 16 | bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order 17 | 18 | 6、用一个producer向某一个topic中写入消息 19 | bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order 20 | 21 | 7、用一个comsumer从某一个topic中读取信息 22 | bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order 23 | 24 | 8、查看一个topic的分区及副本状态信息 25 | bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order -------------------------------------------------------------------------------- /conf_data/kafka安装配置/kafka笔记.txt: -------------------------------------------------------------------------------- 1 | kafka笔记 2 | 1/kafka是一个分布式的消息缓存系统 3 | 2/kafka集群中的服务器都叫做broker 4 | 3/kafka有两类客户端,一类叫producer(消息生产者),一类叫做consumer(消息消费者),客户端和broker服务器之间采用tcp协议连接 5 | 4/kafka中不同业务系统的消息可以通过topic进行区分,而且每一个消息topic都会被分区,以分担消息读写的负载 6 | 5/每一个分区都可以有多个副本,以防止数据的丢失 7 | 6/某一个分区中的数据如果需要更新,都必须通过该分区所有副本中的leader来更新 8 | 7/消费者可以分组,比如有两个消费者组A和B,共同消费一个topic:order_info,A和B所消费的消息不会重复 9 | 比如 order_info 中有100个消息,每个消息有一个id,编号从0-99,那么,如果A组消费0-49号,B组就消费50-99号 10 | 8/消费者在具体消费某个topic中的消息时,可以指定起始偏移量 11 | 12 | 13 | 14 | 15 | 集群安装 16 | 1、解压 17 | cd /home/xxproject/lib 18 | tar -xzvf kafka_2.11-0.11.0.1.tgz 19 | ln -sf kafka_2.11-0.11.0.1 kafka 20 | 21 | 22 | 修改环境变量 23 | echo ' 24 | # !!!No Modification, This Section is Auto Generated by ZooKeeper 25 | export KAFKA_HOME=/home/xxproject/lib/kafka 26 | export PATH=${PATH}:${KAFKA_HOME}/bin 27 | ' >> ~/.bash_profile 28 | source ~/.bash_profile 29 | 30 | 31 | 2、修改server.properties 32 | =================================================================== 33 | broker.id=1/2/3 34 | zookeeper.connect=node-01:2181,node-02:2181,node-03:2181 35 | =================================================================== 36 | 37 | 3、将zookeeper集群启动 38 | 39 | 4、在每一台节点上启动broker, node-02/3/4上分别启动,指定不同的配置文件 40 | # bin/kafka-server-start.sh config/server-1.properties 41 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-1.properties 42 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-2.properties 43 | kafka-server-start.sh -daemon /home/xxproject/lib/kafka/config/server-3.properties 44 | 45 | 46 | 5、在kafka集群中创建一个topic 47 | # bin/kafka-topics.sh --create --zookeeper weekend05:2181 --replication-factor 3 --partitions 1 --topic order 48 | kafka-topics.sh --create --zookeeper 'node-01:2181,node-02:2181,node-03:2181' --replication-factor 3 --partitions 1 --topic order-r 49 | kafka-topics.sh --list --zookeeper 'node-01:2181,node-02:2181,node-03:2181' 50 | kafka-topics.sh --describe --zookeeper 'node-01:2181,node-02:2181,node-03:2181' 51 | 52 | 6、用一个producer向某一个topic中写入消息 53 | # bin/kafka-console-producer.sh --broker-list weekend:9092 --topic order 54 | kafka-console-producer.sh --broker-list node-02:9092 --topic order 55 | >>> This is a message 56 | >>> This is another message 57 | 58 | kafka-console-producer.sh --broker-list node-02:9092,node-03:9092,node-04:9092 --topic order-r 59 | >>> This is a message 60 | >>> This is another message 61 | 62 | 7、用一个comsumer从某一个topic中读取信息 63 | # bin/kafka-console-consumer.sh --zookeeper weekend05:2181 --from-beginning --topic order 64 | kafka-console-consumer.sh --bootstrap-server node-02:9092 --topic order --from-beginning 65 | kafka-console-consumer.sh --bootstrap-server node-02:9092,node-03:9092,node-04:9092 --topic order-r --from-beginning 66 | 67 | 8、查看一个topic的分区及副本状态信息 68 | # bin/kafka-topics.sh --describe --zookeeper weekend05:2181 --topic order 69 | 70 | 71 | -------------------------------------------------------------------------------- /conf_data/order.txt: -------------------------------------------------------------------------------- 1 | 0000101 iphone6plus 64G 6888 2 | 0000102 xiaominote 64G 2388 3 | 0000103 iphone5 64G 6888 4 | 0000104 xiaomi5 64G 2388 5 | 0000105 huawei 64G 6888 6 | -------------------------------------------------------------------------------- /conf_data/spark安装部署.txt: -------------------------------------------------------------------------------- 1 | Spark参考博客:http://blog.csdn.net/lovehuangjiaju 2 | 3 | 版本:2.2.0 4 | #Pre-build with user-provided Hadoop: "Hadoop free" 版,可应用到任意 Hadoop 版本 5 | #https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-without-hadoop.tgz 6 | 7 | Pre-built for Apache Hadoop 2.7 and later 8 | https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz 9 | 10 | 11 | 安装Java==1.8.0_144、Scala==2.11.11 12 | tar -xzvf spark-2.2.0-bin-hadoop2.7.tgz 13 | ln -sf spark-2.2.0-bin-hadoop2.7 spark 14 | 15 | 配置环境变量 16 | echo ' 17 | # !!!No Modification, This Section is Auto Generated by Spark 18 | export SPARK_HOME=/home/xxproject/lib/spark 19 | export PATH=${PATH}:${SPARK_HOME}/bin 20 | ' >> ~/.bash_profile 21 | source ~/.bash_profile 22 | 23 | 配置slaves节点 24 | cd spark/conf/ 25 | cp slaves.template slaves 26 | vi slaves 27 | 28 | # localhost 29 | node-01 30 | node-02 31 | node-03 32 | node-04 33 | 34 | 配置spark启动环境变量 35 | cp spark-env.sh.template spark-env.sh 36 | vi spark-env.sh 37 | 38 | export JAVA_HOME=/home/xxproject/lib/jdk 39 | # export SCALA_HOME=/home/xxproject/lib/scala 40 | export SPARK_MASTER_HOST=node-01 41 | export SPARK_MASTER_PORT=7077 42 | # export MASTER=spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT} 43 | export SPARK_WORKER_CORES=1 44 | # export SPARK_WORKER_INSTANCES=1 45 | export SPARK_WORKER_MEMORY=1g 46 | # export HADOOP_CONF_DIR=/opt/hadoop-2.7.3/etc/hadoop 47 | # export HADOOP_HOME=/home/hadoop/package/hadoop-2.7.2/etc/hadoop 48 | # export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath) 49 | 50 | 启动spark集群: 51 | sbin/start-all.sh 52 | 53 | 54 | 查看界面:http://10.20.0.11:8080/ 55 | 测试一下: 56 | cd /home/xxproject/lib/spark 57 | spark-submit --class org.apache.spark.examples.SparkPi --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1 /home/xxproject/lib/spark/examples/jars/spark-examples_2.11-2.2.0.jar 100 58 | 59 | spark-shell测试: 60 | spark-shell --master spark://node-01:7077 --executor-memory 1G --total-executor-cores 1 61 | 62 | val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/") 63 | val words = lines.flatMap(line => line.split(" ") ) 64 | val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b) 65 | wordCounts.collect().foreach(println) 66 | wordCounts.partitions.length 67 | wordCounts.saveAsTextFile("file:///tmp/output") 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /conf_data/spark运行命令样例.txt: -------------------------------------------------------------------------------- 1 | local单机模式: 2 | 结果xshell可见: 3 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master local[1] ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100 4 | 5 | standalone集群模式: 6 | 需要的配置项 7 | 1, slaves文件 8 | 2, spark-env.sh 9 | export JAVA_HOME=/usr/soft/jdk1.7.0_71 10 | export SPARK_MASTER_IP=spark001 11 | export SPARK_MASTER_PORT=7077 12 | export SPARK_WORKER_CORES=1 13 | export SPARK_WORKER_INSTANCES=1 14 | export SPARK_WORKER_MEMORY=1g 15 | 16 | standalone集群模式: 17 | 之client模式: 18 | 结果xshell可见: 19 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100 20 | 21 | 22 | standalone集群模式: 23 | 之cluster模式: 24 | 结果spark001:8080里面可见! 25 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark001:7077 --deploy-mode cluster --supervise --executor-memory 1G --total-executor-cores 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100 26 | 27 | Yarn集群模式: 28 | 需要的配置项 29 | 1, spark-env.sh 30 | export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop 31 | export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop 32 | export SPARK_HOME=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4 33 | export SPARK_JAR=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4/lib/spark-assembly-1.3.1-hadoop2.4.0.jar 34 | export PATH=$SPARK_HOME/bin:$PATH 35 | 2, ~/.bash_profile 36 | 配置好hadoop环境变量 37 | 38 | Yarn集群模式: 39 | client模式: 40 | 结果xshell可见: 41 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100 42 | 43 | Yarn集群模式: 44 | cluster模式: 45 | 结果spark001:8088里面可见! 46 | ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-cluster --executor-memory 1G --num-executors 1 ./lib/spark-examples-1.3.1-hadoop2.4.0.jar 100 47 | 48 | -------------------------------------------------------------------------------- /conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/conf_data/storm安装配置/storm-trainning-v1.0-zs.ppt -------------------------------------------------------------------------------- /conf_data/storm安装配置/storm.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | ########### These MUST be filled in for a storm configuration 18 | 19 | storm.zookeeper.servers: 20 | - "node-01" 21 | - "node-02" 22 | - "node-0381" 23 | storm.zookeeper.port: 2181 24 | nimbus.seeds: ["node-01"] 25 | 26 | # ##### These may optionally be filled in: 27 | # 28 | ## List of custom serializations 29 | # topology.kryo.register: 30 | # - org.mycompany.MyType 31 | # - org.mycompany.MyType2: org.mycompany.MyType2Serializer 32 | # 33 | ## List of custom kryo decorators 34 | # topology.kryo.decorators: 35 | # - org.mycompany.MyDecorator 36 | # 37 | ## Locations of the drpc servers 38 | # drpc.servers: 39 | # - "server1" 40 | # - "server2" 41 | 42 | ## Metrics Consumers 43 | ## max.retain.metric.tuples 44 | ## - task queue will be unbounded when max.retain.metric.tuples is equal or less than 0. 45 | ## whitelist / blacklist 46 | ## - when none of configuration for metric filter are specified, it'll be treated as 'pass all'. 47 | ## - you need to specify either whitelist or blacklist, or none of them. You can't specify both of them. 48 | ## - you can specify multiple whitelist / blacklist with regular expression 49 | ## expandMapType: expand metric with map type as value to multiple metrics 50 | ## - set to true when you would like to apply filter to expanded metrics 51 | ## - default value is false which is backward compatible value 52 | ## metricNameSeparator: separator between origin metric name and key of entry from map 53 | ## - only effective when expandMapType is set to true 54 | # topology.metrics.consumer.register: 55 | # - class: "org.apache.storm.metric.LoggingMetricsConsumer" 56 | # max.retain.metric.tuples: 100 57 | # parallelism.hint: 1 58 | # - class: "org.mycompany.MyMetricsConsumer" 59 | # max.retain.metric.tuples: 100 60 | # whitelist: 61 | # - "execute.*" 62 | # - "^__complete-latency$" 63 | # parallelism.hint: 1 64 | # argument: 65 | # - endpoint: "metrics-collector.mycompany.org" 66 | # expandMapType: true 67 | # metricNameSeparator: "." 68 | 69 | ## Cluster Metrics Consumers 70 | # storm.cluster.metrics.consumer.register: 71 | # - class: "org.apache.storm.metric.LoggingClusterMetricsConsumer" 72 | # - class: "org.mycompany.MyMetricsConsumer" 73 | # argument: 74 | # - endpoint: "metrics-collector.mycompany.org" 75 | # 76 | # storm.cluster.metrics.consumer.publish.interval.secs: 60 -------------------------------------------------------------------------------- /conf_data/storm安装配置/storm安装手册及笔记.txt: -------------------------------------------------------------------------------- 1 | 1、安装一个zookeeper集群 2 | 3 | 2、上传storm的安装包,解压 4 | /home/xxproject/lib 5 | tar -xzvf apache-storm-1.1.1.tar.gz 6 | ln -sf apache-storm-1.1.1 storm 7 | 8 | 3、修改配置文件storm.yaml 9 | ====================================================== 10 | storm.zookeeper.servers: 11 | - "node-01" 12 | - "node-02" 13 | - "node-0381" 14 | storm.zookeeper.port: 2181 15 | nimbus.seeds: ["node-01"] 16 | ====================================================== 17 | 18 | #所使用的zookeeper集群主机 19 | storm.zookeeper.servers: 20 | - "weekend05" 21 | - "weekend06" 22 | - "weekend07" 23 | 24 | #nimbus所在的主机名 25 | nimbus.host: "weekend05" 26 | 27 | 28 | 配置环境变量: 29 | echo ' 30 | # !!!No Modification, This Section is Auto Generated by ZooKeeper 31 | export STORM_HOME=/home/xxproject/lib/storm 32 | export PATH=${PATH}:${STORM_HOME}/bin 33 | ' >> ~/.bash_profile 34 | source ~/.bash_profile 35 | 36 | 37 | 最多启动5个Worker进程的意思,默认是4个,暂时不需要调整 38 | supervisor.slots.ports 39 | -6701 40 | -6702 41 | -6703 42 | -6704 43 | -6705 44 | 45 | 启动storm 46 | 在nimbus主机上,node-01节点 47 | nohup storm nimbus 1>/dev/null 2>&1 & 48 | nohup storm ui 1>/dev/null 2>&1 & 49 | 50 | 访问: http://10.20.0.11:8080/index.html 51 | 52 | 在supervisor主机上, node-02/3等两个节点上面都启动 53 | nohup storm supervisor 1>/dev/null 2>&1 & 54 | 55 | 56 | storm的深入学习: 57 | 分布式共享锁的实现 58 | 事务topology的实现机制及开发模式 59 | 在具体场景中的跟其他框架的整合(flume/activeMQ/kafka(分布式的消息队列系统) /redis/hbase/mysql cluster) 60 | 61 | 遗留问题: 62 | 对事物的支持 63 | 64 | 65 | 66 | 67 | 提交任务: 68 | storm jar storm.jar com.xcompany.xproject.storm.TestTopo 69 | storm list 70 | storm kill brandNameTopo 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /conf_data/udf.txt: -------------------------------------------------------------------------------- 1 | 1389990045,http://www.163.com,2000 2 | 1385566005,http://www.163.com,2000 3 | 1385566005,http://www.163.com,2000 4 | 1389990045,http://www.163.com,2000 5 | 1390876045,http://www.163.com,2000 6 | 1385566005,http://www.163.com,2000 7 | 1390876045,http://www.163.com,2000 8 | 1390876045,http://www.163.com,2000 9 | 1389990045,http://www.163.com,2000 10 | 11 | select myfunction(nbr),url,flow from t_flow; 12 | 13 | 14 | 1389990045 beijing http://www.163.com 2000 15 | 1385566005,beijing http://www.163.com 2000 16 | 1385566005,beijing http://www.163.com 2000 17 | 1389990045,tianjing,http://www.163.com,2000 18 | 1390876045,tianjing,http://www.163.com,2000 19 | 1385566005,tianjing,http://www.163.com,2000 20 | 1390876045,beijing,http://www.163.com,2000 21 | 1390876045,nanjing,http://www.163.com,2000 22 | 1389990045,nanjing,http://www.163.com,2000 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /conf_data/udt.test.txt: -------------------------------------------------------------------------------- 1 | 1389990045,http://www.163.com,2000 2 | 1385566005,http://www.163.com,2000 3 | 1385566005,http://www.163.com,2000 4 | 1389990045,http://www.163.com,2000 5 | 1390876045,http://www.163.com,2000 6 | 1385566005,http://www.163.com,2000 7 | 1390876045,http://www.163.com,2000 8 | 1390876045,http://www.163.com,2000 9 | 1389990045,http://www.163.com,2000 10 | -------------------------------------------------------------------------------- /conf_data/word-count.txt: -------------------------------------------------------------------------------- 1 | hello world 2 | hello tom 3 | hello jim 4 | hello kitty 5 | hello baby 6 | -------------------------------------------------------------------------------- /conf_data/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | # dataDir=/tmp/zookeeper 13 | dataDir=/home/xxproject/data/zookeeper 14 | # the port at which the clients will connect 15 | clientPort=2181 16 | # the maximum number of client connections. 17 | # increase this if you need to handle more clients 18 | #maxClientCnxns=60 19 | # 20 | # Be sure to read the maintenance section of the 21 | # administrator guide before turning on autopurge. 22 | # 23 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 24 | # 25 | # The number of snapshots to retain in dataDir 26 | #autopurge.snapRetainCount=3 27 | # Purge task interval in hours 28 | # Set to "0" to disable auto purge feature 29 | #autopurge.purgeInterval=1 30 | 31 | # Added By ZooKeeper 32 | server.1=node-01:2888:3888 33 | server.2=node-02:2888:3888 34 | server.3=node-03:2888:3888 35 | 36 | -------------------------------------------------------------------------------- /conf_data/zoo_sample.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | dataDir=/tmp/zookeeper 13 | # the port at which the clients will connect 14 | clientPort=2181 15 | # the maximum number of client connections. 16 | # increase this if you need to handle more clients 17 | #maxClientCnxns=60 18 | # 19 | # Be sure to read the maintenance section of the 20 | # administrator guide before turning on autopurge. 21 | # 22 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 23 | # 24 | # The number of snapshots to retain in dataDir 25 | #autopurge.snapRetainCount=3 26 | # Purge task interval in hours 27 | # Set to "0" to disable auto purge feature 28 | #autopurge.purgeInterval=1 29 | -------------------------------------------------------------------------------- /conf_data/非HA场景/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | fs.defaultFS 23 | hdfs://node-01:9000 24 | The name of the default file system. 25 | 26 | 27 | 28 | hadoop.tmp.dir 29 | /home/xxproject/data/hadoop/tmp 30 | A base for other temporary directories. 31 | 32 | 33 | -------------------------------------------------------------------------------- /conf_data/非HA场景/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | dfs.namenode.secondary.http-address 23 | node-02:50090 24 | The secondary namenode http server address and port. 25 | 26 | 27 | dfs.namenode.secondary.https-address 28 | node-02:50091 29 | The secondary namenode HTTPS server address and port. 30 | 31 | 32 | 33 | dfs.namenode.http-address 34 | node-01:50070 35 | The address and the base port where the dfs namenode web ui will listen on. 36 | 37 | 38 | 39 | dfs.replication 40 | 3 41 | 42 | 43 | -------------------------------------------------------------------------------- /conf_data/非HA场景/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | mapreduce.framework.name 23 | yarn 24 | The runtime framework for executing MapReduce jobs. Can be one of local, classic or yarn. 25 | 26 | 27 | 28 | 29 | yarn.app.mapreduce.am.resource.mb 30 | 1536 31 | The amount of memory the MR AppMaster needs. 32 | 33 | 34 | yarn.app.mapreduce.am.resource.cpu-vcores 35 | 1 36 | The number of virtual CPU cores the MR AppMaster needs. 37 | 38 | 39 | 40 | 44 | 45 | -------------------------------------------------------------------------------- /conf_data/非HA场景/masters: -------------------------------------------------------------------------------- 1 | node-02 2 | -------------------------------------------------------------------------------- /conf_data/非HA场景/slaves: -------------------------------------------------------------------------------- 1 | node-02 2 | node-03 3 | node-04 4 | 5 | -------------------------------------------------------------------------------- /conf_data/非HA场景/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | yarn.resourcemanager.hostname 20 | node-01 21 | 22 | 23 | 24 | 25 | yarn.resourcemanager.webapp.address 26 | node-01:8088 27 | 28 | 29 | 30 | 31 | yarn.resourcemanager.webapp.https.address 32 | node-01:8090 33 | 34 | 35 | 36 | 37 | yarn.nodemanager.aux-services 38 | mapreduce_shuffle 39 | 40 | 41 | 42 | 43 | yarn.nodemanager.resource.memory-mb 44 | 1536 45 | 46 | 47 | yarn.nodemanager.resource.cpu-vcores 48 | 1 49 | 50 | 51 | 52 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /data_analyze.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.jpg -------------------------------------------------------------------------------- /data_analyze.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/data_analyze.png -------------------------------------------------------------------------------- /hadoop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/hadoop.jpg -------------------------------------------------------------------------------- /hbase/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /hbase/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /hbase/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | hbase 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /hbase/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /hbase/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /hbase/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | hbase 14 | 15 | 16 | 18 | 19 | log4j 20 | log4j 21 | 1.2.17 22 | 23 | 24 | org.apache.hbase 25 | hbase-client 26 | 1.2.6 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /hbase/src/main/java/com/xcompany/xproject/hbase/App.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hbase; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /hbase/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hive.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hive.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /hbase/src/test/java/com/xcompany/xproject/hbase/AppTest.java: -------------------------------------------------------------------------------- 1 | //package com.xcompany.xproject.hbase; 2 | // 3 | //import junit.framework.Test; 4 | //import junit.framework.TestCase; 5 | //import junit.framework.TestSuite; 6 | // 7 | ///** 8 | // * Unit test for simple App. 9 | // */ 10 | //public class AppTest 11 | // extends TestCase 12 | //{ 13 | // /** 14 | // * Create the test case 15 | // * 16 | // * @param testName name of the test case 17 | // */ 18 | // public AppTest( String testName ) 19 | // { 20 | // super( testName ); 21 | // } 22 | // 23 | // /** 24 | // * @return the suite of tests being tested 25 | // */ 26 | // public static Test suite() 27 | // { 28 | // return new TestSuite( AppTest.class ); 29 | // } 30 | // 31 | // /** 32 | // * Rigourous Test :-) 33 | // */ 34 | // public void testApp() 35 | // { 36 | // assertTrue( true ); 37 | // } 38 | //} 39 | -------------------------------------------------------------------------------- /hdfs/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /hdfs/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /hdfs/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | hdfs 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /hdfs/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /hdfs/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /hdfs/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop 5 | com.xcompany.xproject 6 | 1.0.0-RELEASE 7 | 8 | 4.0.0 9 | hdfs 10 | hdfs 11 | 12 | 13 | junit 14 | junit 15 | 4.12 16 | test 17 | 18 | 19 | hamcrest-core 20 | org.hamcrest 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /hdfs/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | hdfs 14 | hdfs 15 | 16 | 17 | 18 | org.apache.hadoop 19 | hadoop-client 20 | 2.7.4 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /hdfs/src/main/java/com/xcompany/xproject/hdfs/App.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hdfs; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /hdfs/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hdfs.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hdfs.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /hdfs/src/test/java/com/xcompany/xproject/hdfs/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hdfs; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /hdfs/src/test/java/com/xcompany/xproject/hdfs/HDFSTest.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hdfs; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | 8 | import org.apache.commons.io.IOUtils; 9 | import org.apache.hadoop.conf.Configuration; 10 | import org.apache.hadoop.fs.FSDataInputStream; 11 | import org.apache.hadoop.fs.FSDataOutputStream; 12 | import org.apache.hadoop.fs.FileSystem; 13 | import org.apache.hadoop.fs.LocatedFileStatus; 14 | import org.apache.hadoop.fs.Path; 15 | import org.apache.hadoop.fs.RemoteIterator; 16 | import org.junit.After; 17 | import org.junit.Before; 18 | import org.junit.Test; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | public class HDFSTest { 23 | 24 | private static final Logger LOGGER = LoggerFactory.getLogger(HDFSTest.class); 25 | private FileSystem fs = null; 26 | 27 | @Before 28 | public void setUp() throws IOException { 29 | Configuration conf = new Configuration(); 30 | conf.set("fs.defaultFS", "hdfs://node-01:9000"); 31 | fs = FileSystem.get(conf); 32 | } 33 | 34 | @After 35 | public void tearDown() throws IOException { 36 | fs.close(); 37 | } 38 | 39 | @Test 40 | public void testList() throws FileNotFoundException, IOException { 41 | Path f = new Path("/"); 42 | RemoteIterator files = fs.listFiles(f, true); 43 | while (files.hasNext()) { 44 | LocatedFileStatus file = (LocatedFileStatus) files.next(); 45 | LOGGER.info("====={}", file.getPath()); 46 | } 47 | } 48 | 49 | @Test 50 | public void testPut() throws IOException { 51 | Path f = new Path("/put-word-count.txt"); 52 | FSDataOutputStream fsDataOutputStream = fs.create(f, true); 53 | FileInputStream fileInputStream = new FileInputStream("/home/xxproject/word-count.txt"); 54 | IOUtils.copy(fileInputStream, fsDataOutputStream); 55 | } 56 | 57 | @Test 58 | public void testGet() throws IOException { 59 | Path f = new Path("/put/word-count.txt"); 60 | FSDataInputStream fsDataInputStream = fs.open(f); 61 | FileOutputStream fileOutputStream = new FileOutputStream("/home/xxproject/get-word-count.txt"); 62 | IOUtils.copy(fsDataInputStream, fileOutputStream); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /hive/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /hive/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /hive/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | hive 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /hive/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /hive/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /hive/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop 5 | com.xcompany.xproject 6 | 1.0.0-RELEASE 7 | 8 | 4.0.0 9 | hive 10 | hive 11 | 12 | ${project.artifactId}-${project.version} 13 | 14 | 15 | maven-shade-plugin 16 | 2.2 17 | 18 | 19 | package 20 | 21 | shade 22 | 23 | 24 | 25 | 26 | *:* 27 | 28 | META-INF/*.SF 29 | META-INF/*.DSA 30 | META-INF/*.RSA 31 | META-INF/MANIFEST.MF 32 | META-INF/log4j-provider.properties 33 | 34 | 35 | 36 | 37 | 38 | META-INF/spring.handlers 39 | 40 | 41 | com.xcompany.xproject.hive.Phone2Area 42 | 43 | 44 | META-INF/spring.schemas 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | junit 56 | junit 57 | 4.12 58 | test 59 | 60 | 61 | hamcrest-core 62 | org.hamcrest 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /hive/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | hive 14 | 15 | 16 | 18 | 19 | 20 | org.apache.hive 21 | hive-exec 22 | 2.1.1 23 | 24 | 25 | log4j 26 | log4j 27 | 1.2.17 28 | 29 | 30 | 32 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.apache.maven.plugins 41 | maven-shade-plugin 42 | 2.2 43 | 44 | 45 | package 46 | 47 | shade 48 | 49 | 50 | 51 | 52 | *:* 53 | 54 | META-INF/*.SF 55 | META-INF/*.DSA 56 | META-INF/*.RSA 57 | META-INF/MANIFEST.MF 58 | META-INF/log4j-provider.properties 59 | 60 | 61 | 62 | 63 | 64 | 66 | META-INF/spring.handlers 67 | 68 | 70 | com.xcompany.xproject.hive.Phone2Area 71 | 72 | 74 | META-INF/spring.schemas 75 | 76 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | ${project.artifactId}-${project.version} 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /hive/src/main/java/com/xcompany/xproject/hive/Phone2Area.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hive; 2 | 3 | import java.util.HashMap; 4 | 5 | import org.apache.hadoop.hive.ql.exec.UDF; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | /* 10 | * mvn clean --projects=com.xcompany.xproject:hive 11 | * mvn install --projects=com.xcompany.xproject:hive 12 | * java -jar hive/target/hive-1.0.0-RELEASE.jar 13 | */ 14 | public class Phone2Area extends UDF { 15 | 16 | private static final Logger LOGGER = LoggerFactory.getLogger(Phone2Area.class); 17 | 18 | // Load Once, Speed Up 19 | private static HashMap areaMap = new HashMap(); 20 | 21 | private static void loadData() { 22 | areaMap.put("135", "beijing"); 23 | areaMap.put("136", "shanghai"); 24 | areaMap.put("137", "xian"); 25 | areaMap.put("138", "wuhan"); 26 | } 27 | 28 | static { 29 | // System.setProperty("log4j2.loggerContextFactory", "org.apache.logging.log4j.core.impl.Log4jContextFactory"); 30 | loadData(); 31 | } 32 | 33 | public String evaluate(String phoneNum) { 34 | String preKey = phoneNum.substring(0,3); 35 | return (areaMap.get(preKey) == null) ? "other" : areaMap.get(preKey); 36 | } 37 | 38 | public static void main(String[] args) { 39 | Phone2Area phone2Area = new Phone2Area(); 40 | LOGGER.error(phone2Area.evaluate("18665817689")); 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /hive/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hive.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hive.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /hive/src/test/java/com/xcompany/xproject/hive/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.hive; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /kafka/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /kafka/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /kafka/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | kafka 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /kafka/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /kafka/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | kafka 14 | 15 | 16 | 17 | 19 | 20 | 21 | org.apache.kafka 22 | kafka_2.11 23 | 0.11.0.1 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /kafka/src/main/java/com/xcompany/xproject/kafka/TestConsumer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.kafka; 2 | 3 | import java.util.Arrays; 4 | import java.util.Properties; 5 | 6 | import org.apache.kafka.clients.consumer.Consumer; 7 | import org.apache.kafka.clients.consumer.ConsumerRecord; 8 | import org.apache.kafka.clients.consumer.ConsumerRecords; 9 | import org.apache.kafka.clients.consumer.KafkaConsumer; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | public class TestConsumer { 14 | 15 | private static final Logger LOGGER = LoggerFactory.getLogger(TestConsumer.class); 16 | 17 | public static void main(String[] args) { 18 | Properties properties = new Properties(); 19 | // bin/kafka-topics.sh 20 | properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181"); 21 | // kafka-console-producer.sh 22 | properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092"); 23 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 24 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 25 | // kafka-console-consumer.sh 26 | properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092"); 27 | 28 | // must sepc group.id 29 | properties.put("group.id", "test-group-new"); 30 | properties.put("auto.offset.reset", "earliest"); 31 | 32 | Consumer consumer = new KafkaConsumer(properties); 33 | consumer.subscribe(Arrays.asList("order-r")); 34 | try { 35 | while (true) { 36 | ConsumerRecords records = consumer.poll(1000); // ms 37 | for (ConsumerRecord record : records) { 38 | LOGGER.info("offset = {}, key = {}, value = {}\n", record.offset(), record.key(), record.value()); 39 | } 40 | } 41 | } catch (Exception e) { 42 | } finally { 43 | consumer.close(); 44 | } 45 | 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /kafka/src/main/java/com/xcompany/xproject/kafka/TestProducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.kafka; 2 | 3 | import java.util.Properties; 4 | 5 | import org.apache.kafka.clients.producer.KafkaProducer; 6 | import org.apache.kafka.clients.producer.Producer; 7 | import org.apache.kafka.clients.producer.ProducerRecord; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | 12 | public class TestProducer { 13 | 14 | private static final Logger LOGGER = LoggerFactory.getLogger(TestProducer.class); 15 | 16 | public static void main(String[] args) { 17 | Properties properties = new Properties(); 18 | // bin/kafka-topics.sh 19 | properties.put("zookeeper.connect", "node-01:2181,node-02:2181,node-03:2181"); 20 | // kafka-console-producer.sh 21 | properties.put("metadata.broker.list", "node-02:9092,node-03:9092,node-04:9092"); 22 | properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 23 | properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 24 | // kafka-console-consumer.sh 25 | properties.put("bootstrap.servers", "node-02:9092,node-03:9092,node-04:9092"); 26 | 27 | 28 | Producer producer = new KafkaProducer(properties); 29 | 30 | LOGGER.info("roduce start..."); 31 | for (int i = 0; i < 100; i++) { 32 | ProducerRecord msg = new ProducerRecord("order-r", "name", "Hello_XXX_" + i); 33 | producer.send(msg); 34 | } 35 | producer.close(); 36 | LOGGER.info("produce end..."); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /kafka/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hive.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hive.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /mmdetection/1-mmdection安装使用记录.txt: -------------------------------------------------------------------------------- 1 | # 0、mmdection各组件依赖版本 2 | # 参考:https://mmdetection.readthedocs.io/en/latest/get_started.html 3 | Linux or macOS (Windows is in experimental support) 4 | Python 3.6+:3.7.4 5 | PyTorch 1.3+:1.4 6 | CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible):10.1 7 | GCC 5+ 8 | MMCV 9 | 10 | # 1、参考:https://phoenixnap.com/kb/how-to-install-anaconda-ubuntu-18-04-or-20-04 11 | # curl –O https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh 12 | wget https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh 13 | bash Anaconda3-2020.02-Linux-x86_64.sh 14 | 15 | # 2、Create a conda virtual environment and activate it 16 | conda create -n open-mmlab python=3.7.4 -y 17 | conda activate open-mmlab 18 | 19 | # 3、Install PyTorch and torchvision following the official instructions 20 | # https://pytorch.org/get-started/locally/#windows-pip 21 | conda install pytorch=1.6.0 cudatoolkit=10.1 torchvision==0.7.0 -c pytorch -y 22 | 23 | import torch 24 | x = torch.rand(5, 3) 25 | print(x) 26 | import torch 27 | torch.cuda.is_available() 28 | 29 | # 4、Install mmcv-full, we recommend you to install the pre-build package as below 30 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html 31 | # pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html 32 | 33 | #5、Clone the MMDetection repository. 34 | sudo apt-get -y install build-essential nghttp2 libnghttp2-dev libssl-dev 35 | git clone https://github.com/open-mmlab/mmdetection.git 36 | cd mmdetection 37 | 或者直接下载:wget https://github.com/open-mmlab/mmdetection/archive/v2.10.0.zip 38 | unzip mmdetection-2.10.0.zip 39 | mv mmdetection-2.10.0 mmdetection 40 | 41 | # 6、Install build requirements and then install MMDetection. 42 | pip install -r requirements/build.txt 43 | pip install -v -e . # or "python setup.py develop" 44 | 45 | # 7、通过Docker镜像安装使用 46 | # 镜像仓库:https://hub.docker.com/search?q=mmdetection&type=image 47 | # We provide a Dockerfile to build an image. Ensure that you are using docker version >=19.03. 48 | # build an image with PyTorch 1.6, CUDA 10.1 49 | docker build -t mmdetection docker/ 50 | docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmdetection/data mmdetection 51 | 52 | # 8、验证环境是否安装成功 53 | import torch 54 | available_gpus = [torch.cuda.get_device_properties(i) for i in range(torch.cuda.device_count())] 55 | available_gpus 56 | 57 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 58 | x = torch.tensor([1, 2, 3], device=device) 59 | print(x) 60 | 61 | 62 | from mmdet.apis import init_detector, inference_detector 63 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' 64 | # download the checkpoint from model zoo and put it in `checkpoints/` 65 | # url: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth 66 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' 67 | device = 'cuda:0' 68 | # init a detector 69 | model = init_detector(config_file, checkpoint_file, device=device) 70 | # inference the demo image 71 | inference_detector(model, 'demo/demo.jpg') 72 | 73 | # 8、Azure的GPU服务器规格 74 | 规格:Standard_NC6s_v3 75 | CPU:6核,内存:112G 76 | GPU:1卡,显存:16G 77 | 78 | # nvidia-smi报错的问题 79 | dkms status 80 | sudo apt-get install dkms 81 | sudo dkms install -m nvidia -v 410.78 82 | nvidia-smi -------------------------------------------------------------------------------- /mmdetection/2-mmdection预测新数据.txt: -------------------------------------------------------------------------------- 1 | # notebook安装新内核 2 | # 参考文档:https://docs.microsoft.com/zh-cn/azure/machine-learning/how-to-run-jupyter-notebooks 3 | conda install pip -y 4 | conda install notebook ipykernel -y 5 | python -m ipykernel install --user --name open-mmlab --display-name "Python (open-mmlab)" 6 | 7 | # 下载预训练的模型参数 8 | cd mmdetection 9 | mkdir -p checkpoints/ 10 | wget http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth 11 | conda activate open-mmlab 12 | 13 | # 执行代码 14 | from mmdet.apis import init_detector, inference_detector 15 | import mmcv 16 | 17 | # Specify the path to model config and checkpoint file 18 | config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' 19 | checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' 20 | 21 | # build the model from a config file and a checkpoint file 22 | model = init_detector(config_file, checkpoint_file, device='cuda:0') 23 | 24 | # test a single image and show the results 25 | img = 'demo/demo.jpg' # or img = mmcv.imread(img), which will only load it once 26 | result = inference_detector(model, img) 27 | # visualize the results in a new window 28 | model.show_result(img, result) 29 | # or save the visualization results to image files 30 | model.show_result(img, result, out_file='result.jpg') 31 | 32 | # test a video and show the results 33 | video = mmcv.VideoReader('demo/demo.mp4') 34 | for frame in video: 35 | result = inference_detector(model, frame) 36 | model.show_result(frame, result, wait_time=1) 37 | 38 | # AML的notrbook CPU版本 39 | from mmdet.apis import init_detector, inference_detector, show_result_pyplot 40 | import mmcv 41 | 42 | # Specify the path to model config and checkpoint file 43 | config_file = 'mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' 44 | checkpoint_file = 'mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' 45 | 46 | # build the model from a config file and a checkpoint file 47 | # model = init_detector(config_file, checkpoint_file, device='cuda:0') 48 | model = init_detector(config_file, checkpoint_file, device='cpu') # 默认GPU改写成CPU 49 | 50 | 51 | 52 | # test a single image and show the results 53 | img = 'mmdetection/demo/demo.jpg' # or img = mmcv.imread(img), which will only load it once 54 | result = inference_detector(model, img) 55 | # visualize the results in a new window 56 | model.show_result(img, result) 57 | # or save the visualization results to image files 58 | model.show_result(img, result, out_file='mmdetection/result/result.jpg') 59 | 60 | # show the results 61 | show_result_pyplot(model, img, result) 62 | 63 | 64 | # test a video and show the results 65 | video = mmcv.VideoReader('mmdetection/demo/demo.mp4') 66 | total_frame = 0 67 | for frame in video: 68 | result = inference_detector(model, frame) 69 | # show the results 70 | show_result_pyplot(model, frame, result) 71 | model.show_result(frame, result, wait_time=1) 72 | total_frame += 1 73 | print(total_frame) 74 | 75 | 76 | !cd mmdetection \ 77 | && /anaconda/envs/open-mmlab/bin/python demo/image_demo.py \ 78 | demo/demo.jpg \ 79 | configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py \ 80 | checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \ 81 | --device cpu 82 | -------------------------------------------------------------------------------- /mmdetection/3-mmdection模型指标测试.txt: -------------------------------------------------------------------------------- 1 | # VOC 数据集下载(使用2007年数据集) 2 | # https://pjreddie.com/projects/pascal-voc-dataset-mirror/ 3 | # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html 4 | # https://cocodataset.org/ 5 | cd mmdetection 6 | mkdir data 7 | 8 | 9 | # 下载模型权重 10 | # https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc 11 | cd mmdetection 12 | mkdir -p checkpoints/ 13 | wget http://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth 14 | 15 | # Test Faster R-CNN on PASCAL VOC (without saving the test results) and evaluate the mAP. 16 | # Config and checkpoint files are available here. 17 | !cd mmdetection \ 18 | && /anaconda/envs/open-mmlab/bin/python tools/test.py \ 19 | configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py \ 20 | checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth \ 21 | --show-dir faster_rcnn_r50_fpn_1x_results/ \ 22 | --eval mAP recall 23 | 24 | 25 | 快速删除大文件夹、大文件 26 | mkdir -p blank 27 | rsync --delete-before -d blank/ VOCdevkit/ 28 | 29 | -------------------------------------------------------------------------------- /mmdetection/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mr/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /mr/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /mr/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | mr 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /mr/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /mr/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /mr/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop 5 | com.xcompany.xproject 6 | 1.0.0-RELEASE 7 | 8 | 4.0.0 9 | mr 10 | jar 11 | 12 | 13 | junit 14 | junit 15 | 4.12 16 | test 17 | 18 | 19 | hamcrest-core 20 | org.hamcrest 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /mr/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | mr 14 | jar 15 | 16 | 17 | 18 | org.apache.hadoop 19 | hadoop-client 20 | 2.7.4 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/App.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowBean.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowpartition; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | import org.apache.hadoop.io.Writable; 8 | 9 | public class FlowBean implements Writable { 10 | 11 | private String phoneNum; 12 | private long upFlow; 13 | private long downFlow; 14 | private long sumFlow; 15 | 16 | 17 | public String getPhoneNum() { 18 | return phoneNum; 19 | } 20 | public void setPhoneNum(String phoneNum) { 21 | this.phoneNum = phoneNum; 22 | } 23 | public long getUpFlow() { 24 | return upFlow; 25 | } 26 | public void setUpFlow(long upFlow) { 27 | this.upFlow = upFlow; 28 | } 29 | public long getDownFlow() { 30 | return downFlow; 31 | } 32 | public void setDownFlow(long downFlow) { 33 | this.downFlow = downFlow; 34 | } 35 | public long getSumFlow() { 36 | return sumFlow; 37 | } 38 | public void setSumFlow(long sumFlow) { 39 | this.sumFlow = sumFlow; 40 | } 41 | 42 | // @Override 43 | // public String toString() { 44 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow 45 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]"; 46 | // } 47 | @Override 48 | public String toString() { 49 | return upFlow + "\t" + downFlow + "\t" + sumFlow; 50 | } 51 | 52 | public void write(DataOutput out) throws IOException { 53 | out.writeUTF(phoneNum); 54 | out.writeLong(upFlow); 55 | out.writeLong(downFlow); 56 | out.writeLong(sumFlow); 57 | } 58 | public void readFields(DataInput in) throws IOException { 59 | phoneNum = in.readUTF(); 60 | upFlow = in.readLong(); 61 | downFlow = in.readLong(); 62 | sumFlow = in.readLong(); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartition.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowpartition; 2 | 3 | import java.util.HashMap; 4 | 5 | import org.apache.hadoop.mapreduce.Partitioner; 6 | 7 | 8 | public class FlowPartition extends Partitioner{ 9 | 10 | // Load Once, Speed Up 11 | private static HashMap partitionMap = new HashMap(); 12 | private static void loadData() { 13 | partitionMap.put("135", 0); 14 | partitionMap.put("136", 1); 15 | partitionMap.put("137", 2); 16 | partitionMap.put("138", 3); 17 | } 18 | static { 19 | loadData(); 20 | } 21 | 22 | @Override 23 | public int getPartition(K key, V value, int numPartitions) { 24 | //return 0; 25 | String preKey = key.toString().substring(0,3); 26 | return (partitionMap.get(preKey) == null) ? 4 : partitionMap.get(preKey); 27 | } 28 | } 29 | 30 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowpartition; 2 | 3 | import java.util.Date; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | import org.apache.hadoop.util.GenericOptionsParser; 13 | import org.apache.hadoop.util.Tool; 14 | import org.apache.hadoop.util.ToolRunner; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | public class FlowPartitionJob extends Configured implements Tool { 19 | 20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionJob.class); 21 | 22 | public static void main(String[] args) throws Exception { 23 | 24 | Date startTime = new Date(); 25 | LOGGER.info("==========job started: " + startTime); 26 | int res = ToolRunner.run(new Configuration(), new FlowPartitionJob(), args); 27 | Date endTime = new Date(); 28 | LOGGER.info("==========job ended: " + endTime); 29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds"); 30 | System.exit(res); 31 | } 32 | 33 | public int run(String[] args) throws Exception { 34 | 35 | /*Configuration conf = getConf(); 36 | JobClient client = new JobClient(conf); 37 | ClusterStatus cluster = client.getClusterStatus(); 38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); 39 | String join_reduces = conf.get(REDUCES_PER_HOST); 40 | if (join_reduces != null) { 41 | num_reduces = cluster.getTaskTrackers() * 42 | Integer.parseInt(join_reduces); 43 | } 44 | // Set user-supplied (possibly default) job configs 45 | job.setNumReduceTasks(num_reduces);*/ 46 | 47 | 48 | Configuration conf = new Configuration(); 49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 50 | String[] otherArgs = new GenericOptionsParser(conf, args) 51 | .getRemainingArgs(); 52 | 53 | String commaSeparatedPaths = null; 54 | String outputDir = null; 55 | if (otherArgs.length == 2) { 56 | commaSeparatedPaths = otherArgs[0]; 57 | outputDir = otherArgs[1]; 58 | } else { 59 | System.err.println("Usage: [,...] "); 60 | //System.exit(-1); 61 | return -1; 62 | } 63 | 64 | 65 | Job job = Job.getInstance(conf); 66 | job.setJobName("FlowPartitionJob"); 67 | job.setJarByClass(FlowPartitionJob.class); 68 | 69 | // job.setInputFormatClass(TextInputFormat.class); 70 | // job.setOutputFormatClass(TextOutputFormat.class); 71 | 72 | job.setMapperClass(FlowPartitionMapper.class); 73 | //job.setCombinerClass(WordCountReducer.class); 74 | job.setReducerClass(FlowPartitionReducer.class); 75 | 76 | job.setPartitionerClass(FlowPartition.class); 77 | job.setNumReduceTasks(5); 78 | 79 | job.setOutputKeyClass(Text.class); 80 | job.setOutputValueClass(FlowBean.class); 81 | job.setMapOutputKeyClass(Text.class); 82 | job.setMapOutputValueClass(FlowBean.class); 83 | 84 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 85 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 86 | 87 | return job.waitForCompletion(true) ? 0 : 1; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowpartition; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Mapper; 8 | import org.apache.hadoop.util.StringUtils; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | public class FlowPartitionMapper extends Mapper { 13 | 14 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowPartitionMapper.class); 15 | 16 | private String line = null; 17 | private final static char SEPARATOR = '\t'; 18 | 19 | private String phoneNum = null; 20 | private long upFlow = 0; 21 | private long downFlow = 0; 22 | //private long sumFlow = 0; 23 | 24 | private Text text = new Text(); 25 | private FlowBean flowBean = new FlowBean(); 26 | 27 | @Override 28 | protected void map(LongWritable key, Text value, 29 | Mapper.Context context) 30 | throws IOException, InterruptedException { 31 | 32 | //super.map(key, value, context); 33 | line = value.toString(); 34 | String[] fields = StringUtils.split(line, SEPARATOR); 35 | if (fields.length != 11) { 36 | LOGGER.error("invalid line: {}", line); 37 | System.err.println("invalid line: " + line); 38 | } else { 39 | phoneNum = fields[1]; 40 | upFlow = Long.parseLong(fields[8]); 41 | downFlow = Long.parseLong(fields[9]); 42 | flowBean.setPhoneNum(phoneNum); 43 | flowBean.setUpFlow(upFlow); 44 | flowBean.setDownFlow(downFlow); 45 | //sumFlow = upFlow + downFlow; 46 | flowBean.setSumFlow(upFlow + downFlow); 47 | text.set(phoneNum); 48 | context.write(text, flowBean); 49 | } 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowpartition/FlowPartitionReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowpartition; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.Text; 6 | import org.apache.hadoop.mapreduce.Reducer; 7 | 8 | public class FlowPartitionReducer extends Reducer{ 9 | 10 | private FlowBean result = new FlowBean(); 11 | 12 | @Override 13 | protected void reduce(Text key, Iterable values, 14 | Reducer.Context context) 15 | throws IOException, InterruptedException { 16 | 17 | //super.reduce(arg0, arg1, arg2); 18 | long upFlow = 0; 19 | long downFlow = 0; 20 | //long flowSum = 0; 21 | for (FlowBean flowBean : values) { 22 | upFlow += flowBean.getUpFlow(); 23 | downFlow += flowBean.getDownFlow(); 24 | //flowSum += flowBean.getSumFlow(); 25 | } 26 | result.setPhoneNum(key.toString()); 27 | result.setUpFlow(upFlow); 28 | result.setDownFlow(downFlow); 29 | //result.setSumFlow(flowSum); 30 | result.setSumFlow(upFlow + downFlow); 31 | context.write(key, result); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowBean.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsort; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | import org.apache.hadoop.io.WritableComparable; 8 | 9 | 10 | public class FlowBean implements WritableComparable { 11 | 12 | private String phoneNum; 13 | private long upFlow; 14 | private long downFlow; 15 | private long sumFlow; 16 | 17 | 18 | public String getPhoneNum() { 19 | return phoneNum; 20 | } 21 | public void setPhoneNum(String phoneNum) { 22 | this.phoneNum = phoneNum; 23 | } 24 | public long getUpFlow() { 25 | return upFlow; 26 | } 27 | public void setUpFlow(long upFlow) { 28 | this.upFlow = upFlow; 29 | } 30 | public long getDownFlow() { 31 | return downFlow; 32 | } 33 | public void setDownFlow(long downFlow) { 34 | this.downFlow = downFlow; 35 | } 36 | public long getSumFlow() { 37 | return sumFlow; 38 | } 39 | public void setSumFlow(long sumFlow) { 40 | this.sumFlow = sumFlow; 41 | } 42 | 43 | // @Override 44 | // public String toString() { 45 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow 46 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]"; 47 | // } 48 | @Override 49 | public String toString() { 50 | return phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow; 51 | } 52 | 53 | public void write(DataOutput out) throws IOException { 54 | out.writeUTF(phoneNum); 55 | out.writeLong(upFlow); 56 | out.writeLong(downFlow); 57 | out.writeLong(sumFlow); 58 | } 59 | public void readFields(DataInput in) throws IOException { 60 | phoneNum = in.readUTF(); 61 | upFlow = in.readLong(); 62 | downFlow = in.readLong(); 63 | sumFlow = in.readLong(); 64 | } 65 | public int compareTo(FlowBean o) { 66 | //return 0; 67 | // DESC 68 | long thisValue = this.sumFlow; 69 | long thatValue = o.getSumFlow(); 70 | return (thisValue < thatValue ? 1 : (thisValue == thatValue ? 0 : -1)); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsort; 2 | 3 | import java.util.Date; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.NullWritable; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | import org.apache.hadoop.util.GenericOptionsParser; 13 | import org.apache.hadoop.util.Tool; 14 | import org.apache.hadoop.util.ToolRunner; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | public class FlowSortJob extends Configured implements Tool { 19 | 20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortJob.class); 21 | 22 | public static void main(String[] args) throws Exception { 23 | 24 | Date startTime = new Date(); 25 | LOGGER.info("==========job started: " + startTime); 26 | int res = ToolRunner.run(new Configuration(), new FlowSortJob(), args); 27 | Date endTime = new Date(); 28 | LOGGER.info("==========job ended: " + endTime); 29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds"); 30 | System.exit(res); 31 | } 32 | 33 | public int run(String[] args) throws Exception { 34 | 35 | /*Configuration conf = getConf(); 36 | JobClient client = new JobClient(conf); 37 | ClusterStatus cluster = client.getClusterStatus(); 38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); 39 | String join_reduces = conf.get(REDUCES_PER_HOST); 40 | if (join_reduces != null) { 41 | num_reduces = cluster.getTaskTrackers() * 42 | Integer.parseInt(join_reduces); 43 | } 44 | // Set user-supplied (possibly default) job configs 45 | job.setNumReduceTasks(num_reduces);*/ 46 | 47 | 48 | Configuration conf = new Configuration(); 49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 50 | String[] otherArgs = new GenericOptionsParser(conf, args) 51 | .getRemainingArgs(); 52 | 53 | String commaSeparatedPaths = null; 54 | String outputDir = null; 55 | if (otherArgs.length == 2) { 56 | commaSeparatedPaths = otherArgs[0]; 57 | outputDir = otherArgs[1]; 58 | } else { 59 | System.err.println("Usage: [,...] "); 60 | //System.exit(-1); 61 | return -1; 62 | } 63 | 64 | 65 | Job job = Job.getInstance(conf); 66 | job.setJobName("FlowSortJob"); 67 | job.setJarByClass(FlowSortJob.class); 68 | 69 | job.setMapperClass(FlowSortMapper.class); 70 | //job.setCombinerClass(WordCountReducer.class); 71 | job.setReducerClass(FlowSortReducer.class); 72 | 73 | job.setOutputKeyClass(FlowBean.class); 74 | job.setOutputValueClass(NullWritable.class); 75 | job.setMapOutputKeyClass(FlowBean.class); 76 | job.setMapOutputValueClass(NullWritable.class); 77 | 78 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 79 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 80 | 81 | return job.waitForCompletion(true) ? 0 : 1; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsort; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.NullWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.mapreduce.Mapper; 9 | import org.apache.hadoop.util.StringUtils; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | public class FlowSortMapper extends Mapper { 14 | 15 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSortMapper.class); 16 | 17 | private FlowBean flowBean = new FlowBean(); 18 | 19 | private String line = null; 20 | private final static char SEPARATOR = '\t'; 21 | 22 | private String phoneNum = null; 23 | private long upFlow = 0; 24 | private long downFlow = 0; 25 | private long sumFlow = 0; 26 | 27 | @Override 28 | protected void map(LongWritable key, Text value, 29 | Mapper.Context context) 30 | throws IOException, InterruptedException { 31 | 32 | //super.map(key, value, context); 33 | line = value.toString(); 34 | String[] fields = StringUtils.split(line, SEPARATOR); 35 | if (fields.length != 4) { 36 | LOGGER.error("invalid line: {}", line); 37 | System.err.println("invalid line: " + line); 38 | } else { 39 | phoneNum = fields[0]; 40 | upFlow = Long.parseLong(fields[1]); 41 | downFlow = Long.parseLong(fields[2]); 42 | sumFlow = Long.parseLong(fields[3]); 43 | flowBean.setPhoneNum(phoneNum); 44 | flowBean.setUpFlow(upFlow); 45 | flowBean.setDownFlow(downFlow); 46 | flowBean.setSumFlow(sumFlow); 47 | context.write(flowBean, NullWritable.get()); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsort/FlowSortReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsort; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.NullWritable; 6 | import org.apache.hadoop.mapreduce.Reducer; 7 | 8 | public class FlowSortReducer extends Reducer{ 9 | 10 | @Override 11 | protected void reduce(FlowBean key, Iterable values, 12 | Reducer.Context context) 13 | throws IOException, InterruptedException { 14 | 15 | //super.reduce(arg0, arg1, arg2); 16 | context.write(key, NullWritable.get()); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowBean.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsum; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | import org.apache.hadoop.io.Writable; 8 | 9 | public class FlowBean implements Writable { 10 | 11 | private String phoneNum; 12 | private long upFlow; 13 | private long downFlow; 14 | private long sumFlow; 15 | 16 | 17 | public String getPhoneNum() { 18 | return phoneNum; 19 | } 20 | public void setPhoneNum(String phoneNum) { 21 | this.phoneNum = phoneNum; 22 | } 23 | public long getUpFlow() { 24 | return upFlow; 25 | } 26 | public void setUpFlow(long upFlow) { 27 | this.upFlow = upFlow; 28 | } 29 | public long getDownFlow() { 30 | return downFlow; 31 | } 32 | public void setDownFlow(long downFlow) { 33 | this.downFlow = downFlow; 34 | } 35 | public long getSumFlow() { 36 | return sumFlow; 37 | } 38 | public void setSumFlow(long sumFlow) { 39 | this.sumFlow = sumFlow; 40 | } 41 | 42 | // @Override 43 | // public String toString() { 44 | // return "FlowBean [phoneNum=" + phoneNum + ", upFlow=" + upFlow 45 | // + ", downFlow=" + downFlow + ", sumFlow=" + sumFlow + "]"; 46 | // } 47 | @Override 48 | public String toString() { 49 | return upFlow + "\t" + downFlow + "\t" + sumFlow; 50 | } 51 | 52 | public void write(DataOutput out) throws IOException { 53 | out.writeUTF(phoneNum); 54 | out.writeLong(upFlow); 55 | out.writeLong(downFlow); 56 | out.writeLong(sumFlow); 57 | } 58 | public void readFields(DataInput in) throws IOException { 59 | phoneNum = in.readUTF(); 60 | upFlow = in.readLong(); 61 | downFlow = in.readLong(); 62 | sumFlow = in.readLong(); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsum; 2 | 3 | import java.util.Date; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | import org.apache.hadoop.util.GenericOptionsParser; 13 | import org.apache.hadoop.util.Tool; 14 | import org.apache.hadoop.util.ToolRunner; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | public class FlowSumJob extends Configured implements Tool { 19 | 20 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumJob.class); 21 | 22 | public static void main(String[] args) throws Exception { 23 | 24 | Date startTime = new Date(); 25 | LOGGER.info("==========job started: " + startTime); 26 | int res = ToolRunner.run(new Configuration(), new FlowSumJob(), args); 27 | Date endTime = new Date(); 28 | LOGGER.info("==========job ended: " + endTime); 29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds"); 30 | System.exit(res); 31 | } 32 | 33 | public int run(String[] args) throws Exception { 34 | 35 | /*Configuration conf = getConf(); 36 | JobClient client = new JobClient(conf); 37 | ClusterStatus cluster = client.getClusterStatus(); 38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); 39 | String join_reduces = conf.get(REDUCES_PER_HOST); 40 | if (join_reduces != null) { 41 | num_reduces = cluster.getTaskTrackers() * 42 | Integer.parseInt(join_reduces); 43 | } 44 | // Set user-supplied (possibly default) job configs 45 | job.setNumReduceTasks(num_reduces);*/ 46 | 47 | 48 | Configuration conf = new Configuration(); 49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 50 | String[] otherArgs = new GenericOptionsParser(conf, args) 51 | .getRemainingArgs(); 52 | 53 | String commaSeparatedPaths = null; 54 | String outputDir = null; 55 | if (otherArgs.length == 2) { 56 | commaSeparatedPaths = otherArgs[0]; 57 | outputDir = otherArgs[1]; 58 | } else { 59 | System.err.println("Usage: [,...] "); 60 | //System.exit(-1); 61 | return -1; 62 | } 63 | 64 | 65 | Job job = Job.getInstance(conf); 66 | job.setJobName("FlowSumJob"); 67 | job.setJarByClass(FlowSumJob.class); 68 | 69 | job.setMapperClass(FlowSumMapper.class); 70 | //job.setCombinerClass(WordCountReducer.class); 71 | job.setReducerClass(FlowSumReducer.class); 72 | 73 | job.setOutputKeyClass(Text.class); 74 | job.setOutputValueClass(FlowBean.class); 75 | job.setMapOutputKeyClass(Text.class); 76 | job.setMapOutputValueClass(FlowBean.class); 77 | 78 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 79 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 80 | 81 | return job.waitForCompletion(true) ? 0 : 1; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsum; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Mapper; 8 | import org.apache.hadoop.util.StringUtils; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | public class FlowSumMapper extends Mapper { 13 | 14 | private static final Logger LOGGER = LoggerFactory.getLogger(FlowSumMapper.class); 15 | 16 | private String line = null; 17 | private final static char SEPARATOR = '\t'; 18 | 19 | private String phoneNum = null; 20 | private long upFlow = 0; 21 | private long downFlow = 0; 22 | //private long sumFlow = 0; 23 | 24 | private Text text = new Text(); 25 | private FlowBean flowBean = new FlowBean(); 26 | 27 | @Override 28 | protected void map(LongWritable key, Text value, 29 | Mapper.Context context) 30 | throws IOException, InterruptedException { 31 | 32 | //super.map(key, value, context); 33 | line = value.toString(); 34 | String[] fields = StringUtils.split(line, SEPARATOR); 35 | if (fields.length != 11) { 36 | LOGGER.error("invalid line: {}", line); 37 | System.err.println("invalid line: " + line); 38 | } else { 39 | phoneNum = fields[1]; 40 | upFlow = Long.parseLong(fields[8]); 41 | downFlow = Long.parseLong(fields[9]); 42 | flowBean.setPhoneNum(phoneNum); 43 | flowBean.setUpFlow(upFlow); 44 | flowBean.setDownFlow(downFlow); 45 | //sumFlow = upFlow + downFlow; 46 | flowBean.setSumFlow(upFlow + downFlow); 47 | text.set(phoneNum); 48 | context.write(text, flowBean); 49 | } 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/flowsum/FlowSumReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.flowsum; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.Text; 6 | import org.apache.hadoop.mapreduce.Reducer; 7 | 8 | public class FlowSumReducer extends Reducer{ 9 | 10 | private FlowBean result = new FlowBean(); 11 | 12 | @Override 13 | protected void reduce(Text key, Iterable values, 14 | Reducer.Context context) 15 | throws IOException, InterruptedException { 16 | 17 | //super.reduce(arg0, arg1, arg2); 18 | long upFlow = 0; 19 | long downFlow = 0; 20 | //long flowSum = 0; 21 | for (FlowBean flowBean : values) { 22 | upFlow += flowBean.getUpFlow(); 23 | downFlow += flowBean.getDownFlow(); 24 | //flowSum += flowBean.getSumFlow(); 25 | } 26 | result.setPhoneNum(key.toString()); 27 | result.setUpFlow(upFlow); 28 | result.setDownFlow(downFlow); 29 | //result.setSumFlow(flowSum); 30 | result.setSumFlow(upFlow + downFlow); 31 | context.write(key, result); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.util.Date; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.LongWritable; 9 | import org.apache.hadoop.io.Text; 10 | import org.apache.hadoop.mapreduce.Job; 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 13 | import org.apache.hadoop.util.GenericOptionsParser; 14 | import org.apache.hadoop.util.Tool; 15 | import org.apache.hadoop.util.ToolRunner; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | public class StepOneJob extends Configured implements Tool { 20 | 21 | private static final Logger LOGGER = LoggerFactory.getLogger(StepOneJob.class); 22 | 23 | public static void main(String[] args) throws Exception { 24 | 25 | Date startTime = new Date(); 26 | LOGGER.info("==========job started: " + startTime); 27 | int res = ToolRunner.run(new Configuration(), new StepOneJob(), args); 28 | Date endTime = new Date(); 29 | LOGGER.info("==========job ended: " + endTime); 30 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds"); 31 | System.exit(res); 32 | } 33 | 34 | public int run(String[] args) throws Exception { 35 | 36 | /*Configuration conf = getConf(); 37 | JobClient client = new JobClient(conf); 38 | ClusterStatus cluster = client.getClusterStatus(); 39 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); 40 | String join_reduces = conf.get(REDUCES_PER_HOST); 41 | if (join_reduces != null) { 42 | num_reduces = cluster.getTaskTrackers() * 43 | Integer.parseInt(join_reduces); 44 | } 45 | // Set user-supplied (possibly default) job configs 46 | job.setNumReduceTasks(num_reduces);*/ 47 | 48 | 49 | Configuration conf = new Configuration(); 50 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 51 | String[] otherArgs = new GenericOptionsParser(conf, args) 52 | .getRemainingArgs(); 53 | 54 | String commaSeparatedPaths = null; 55 | String outputDir = null; 56 | if (otherArgs.length == 2) { 57 | commaSeparatedPaths = otherArgs[0]; 58 | outputDir = otherArgs[1]; 59 | } else { 60 | System.err.println("Usage: [,...] "); 61 | //System.exit(-1); 62 | return -1; 63 | } 64 | 65 | 66 | Job job = Job.getInstance(conf); 67 | job.setJobName("StepOneJob"); 68 | job.setJarByClass(StepOneJob.class); 69 | 70 | // job.setInputFormatClass(TextInputFormat.class); 71 | // job.setOutputFormatClass(TextOutputFormat.class); 72 | 73 | job.setMapperClass(StepOneMapper.class); 74 | job.setCombinerClass(StepOneReducer.class); 75 | job.setReducerClass(StepOneReducer.class); 76 | 77 | // job.setPartitionerClass(FlowPartition.class); 78 | // job.setNumReduceTasks(5); 79 | 80 | job.setOutputKeyClass(Text.class); 81 | job.setOutputValueClass(LongWritable.class); 82 | job.setMapOutputKeyClass(Text.class); 83 | job.setMapOutputValueClass(LongWritable.class); 84 | 85 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 86 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 87 | 88 | return job.waitForCompletion(true) ? 0 : 1; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.io.IOException; 4 | import java.util.StringTokenizer; 5 | 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.mapreduce.Mapper; 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 | 11 | public class StepOneMapper extends Mapper{ 12 | 13 | // private static final Logger LOGGER = LoggerFactory.getLogger(StepOneMapper.class); 14 | 15 | private final static char SEPARATOR = '\t'; 16 | 17 | private Text text = new Text(); 18 | private static final LongWritable ONE = new LongWritable(1L); 19 | 20 | @Override 21 | protected void map(LongWritable key, Text value, 22 | Mapper.Context context) 23 | throws IOException, InterruptedException { 24 | 25 | //super.map(key, value, context); 26 | StringTokenizer itr = new StringTokenizer(value.toString()); 27 | while (itr.hasMoreTokens()) { 28 | text.set(itr.nextToken() + SEPARATOR + ((FileSplit)context.getInputSplit()).getPath().getName()); 29 | context.write(text, ONE); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepOneReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | 9 | public class StepOneReducer extends Reducer{ 10 | 11 | private LongWritable result = new LongWritable(); 12 | 13 | @Override 14 | protected void reduce(Text key, Iterable values, 15 | Reducer.Context context) 16 | throws IOException, InterruptedException { 17 | 18 | //super.reduce(arg0, arg1, arg2); 19 | long count = 0; 20 | for (LongWritable value : values) { 21 | count += value.get(); 22 | } 23 | result.set(count); 24 | context.write(key, result); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.util.Date; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | import org.apache.hadoop.util.GenericOptionsParser; 13 | import org.apache.hadoop.util.Tool; 14 | import org.apache.hadoop.util.ToolRunner; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | public class StepTwoJob extends Configured implements Tool { 19 | 20 | private static final Logger LOGGER = LoggerFactory.getLogger(StepTwoJob.class); 21 | 22 | public static void main(String[] args) throws Exception { 23 | 24 | Date startTime = new Date(); 25 | LOGGER.info("==========job started: " + startTime); 26 | int res = ToolRunner.run(new Configuration(), new StepTwoJob(), args); 27 | Date endTime = new Date(); 28 | LOGGER.info("==========job ended: " + endTime); 29 | LOGGER.info("==========job took: " + (endTime.getTime() - startTime.getTime())/1000 + " seconds"); 30 | System.exit(res); 31 | } 32 | 33 | public int run(String[] args) throws Exception { 34 | 35 | /*Configuration conf = getConf(); 36 | JobClient client = new JobClient(conf); 37 | ClusterStatus cluster = client.getClusterStatus(); 38 | int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); 39 | String join_reduces = conf.get(REDUCES_PER_HOST); 40 | if (join_reduces != null) { 41 | num_reduces = cluster.getTaskTrackers() * 42 | Integer.parseInt(join_reduces); 43 | } 44 | // Set user-supplied (possibly default) job configs 45 | job.setNumReduceTasks(num_reduces);*/ 46 | 47 | 48 | Configuration conf = new Configuration(); 49 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 50 | String[] otherArgs = new GenericOptionsParser(conf, args) 51 | .getRemainingArgs(); 52 | 53 | String commaSeparatedPaths = null; 54 | String outputDir = null; 55 | if (otherArgs.length == 2) { 56 | commaSeparatedPaths = otherArgs[0]; 57 | outputDir = otherArgs[1]; 58 | } else { 59 | System.err.println("Usage: [,...] "); 60 | //System.exit(-1); 61 | return -1; 62 | } 63 | 64 | 65 | Job job = Job.getInstance(conf); 66 | job.setJobName("StepTwoJob"); 67 | job.setJarByClass(StepTwoJob.class); 68 | 69 | // job.setInputFormatClass(TextInputFormat.class); 70 | // job.setOutputFormatClass(TextOutputFormat.class); 71 | 72 | job.setMapperClass(StepTwoMapper.class); 73 | // job.setCombinerClass(StepOneReducer.class); 74 | job.setReducerClass(StepTwoReducer.class); 75 | 76 | // job.setPartitionerClass(FlowPartition.class); 77 | // job.setNumReduceTasks(5); 78 | 79 | job.setOutputKeyClass(Text.class); 80 | job.setOutputValueClass(Text.class); 81 | job.setMapOutputKeyClass(Text.class); 82 | job.setMapOutputValueClass(Text.class); 83 | 84 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 85 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 86 | 87 | return job.waitForCompletion(true) ? 0 : 1; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Mapper; 8 | import org.apache.hadoop.util.StringUtils; 9 | 10 | public class StepTwoMapper extends Mapper { 11 | 12 | private Text textKey = new Text(); 13 | private Text textValue = new Text(); 14 | 15 | private final static char SEPARATOR = '\t'; 16 | private String line = null; 17 | private String word = null; 18 | private String fileName = null; 19 | private String count = null; 20 | 21 | @Override 22 | protected void map(LongWritable key, Text value, 23 | Mapper.Context context) 24 | throws IOException, InterruptedException { 25 | 26 | //super.map(key, value, context); 27 | line = value.toString(); 28 | String[] splits = StringUtils.split(line, SEPARATOR); 29 | word = splits[0]; 30 | fileName = splits[1]; 31 | count = splits[2]; 32 | textKey.set(word); 33 | textValue.set(fileName + SEPARATOR + count); 34 | context.write(textKey, textValue); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/invertedindex/StepTwoReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.invertedindex; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.Text; 6 | import org.apache.hadoop.mapreduce.Reducer; 7 | import org.apache.hadoop.util.StringUtils; 8 | 9 | public class StepTwoReducer extends Reducer{ 10 | 11 | private Text result = new Text(); 12 | 13 | @Override 14 | protected void reduce(Text key, Iterable values, 15 | Reducer.Context context) throws IOException, 16 | InterruptedException { 17 | 18 | //super.reduce(arg0, arg1, arg2); 19 | result.set(StringUtils.join(";", values)); 20 | context.write(key, result); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountJob.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.wordcount; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.io.LongWritable; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 | import org.apache.hadoop.util.GenericOptionsParser; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | /* 17 | * conf: copy hadoop conf to src/main/resources dir or exe jar on hadoop node 18 | * export: wordcount.jar 19 | * example: hadoop jar wordcount.jar com.xcompany.xproject.mr.wordcount.WordCountJob /word-count/input /word-count/output 20 | */ 21 | public class WordCountJob { 22 | 23 | private static final Logger LOGGER = LoggerFactory 24 | .getLogger(WordCountJob.class); 25 | 26 | public static void main(String[] args) throws IOException, 27 | ClassNotFoundException, InterruptedException { 28 | 29 | Configuration conf = new Configuration(); 30 | //conf.set("fs.defaultFS", "hdfs://node-01:9000"); 31 | String[] otherArgs = new GenericOptionsParser(conf, args) 32 | .getRemainingArgs(); 33 | 34 | String commaSeparatedPaths = null; 35 | String outputDir = null; 36 | if (otherArgs.length == 2) { 37 | commaSeparatedPaths = otherArgs[0]; 38 | outputDir = otherArgs[1]; 39 | } else { 40 | System.err.println("Usage: [,...] "); 41 | System.exit(-1); 42 | } 43 | 44 | LOGGER.info("==========job start"); 45 | Job job = Job.getInstance(conf); 46 | job.setJobName("WordCountJob"); 47 | job.setJarByClass(WordCountJob.class); 48 | 49 | job.setMapperClass(WordCountMapper.class); 50 | job.setCombinerClass(WordCountReducer.class); 51 | job.setReducerClass(WordCountReducer.class); 52 | 53 | job.setOutputKeyClass(Text.class); 54 | job.setOutputValueClass(LongWritable.class); 55 | job.setMapOutputKeyClass(Text.class); 56 | job.setMapOutputValueClass(LongWritable.class); 57 | 58 | FileInputFormat.setInputPaths(job, commaSeparatedPaths); 59 | FileOutputFormat.setOutputPath(job, new Path(outputDir)); 60 | 61 | if (job.waitForCompletion(true)) { 62 | LOGGER.info("==========job success"); 63 | } else { 64 | LOGGER.info("==========job failed"); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountMapper.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.wordcount; 2 | 3 | import java.io.IOException; 4 | import java.util.StringTokenizer; 5 | 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.mapreduce.Mapper; 9 | 10 | /* 11 | * http://blog.csdn.net/boonya/article/details/54959393 12 | * http://blog.csdn.net/guoery/article/details/8529004 13 | * LongWritable: LineNumber 14 | * Text : LineString 15 | * Text : OutKey 16 | * LongWritable: OutValue 17 | */ 18 | public class WordCountMapper extends Mapper{ 19 | 20 | private final static LongWritable ONE = new LongWritable(1L); 21 | private Text word = new Text(); 22 | 23 | @Override 24 | protected void map(LongWritable key, Text value, 25 | Mapper.Context context) 26 | throws IOException, InterruptedException { 27 | 28 | //super.map(key, value, context); 29 | StringTokenizer itr = new StringTokenizer(value.toString()); 30 | while (itr.hasMoreTokens()) { 31 | word.set(itr.nextToken()); 32 | context.write(word, ONE); 33 | 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /mr/src/main/java/com/xcompany/xproject/mr/wordcount/WordCountReducer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr.wordcount; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.apache.hadoop.mapreduce.Reducer; 8 | 9 | public class WordCountReducer extends Reducer{ 10 | 11 | private LongWritable result = new LongWritable(); 12 | 13 | @Override 14 | protected void reduce(Text key, Iterable values, 15 | Reducer.Context context) 16 | throws IOException, InterruptedException { 17 | 18 | //super.reduce(arg0, arg1, arg2); 19 | long count = 0; 20 | for (LongWritable value : values) { 21 | count += value.get(); 22 | } 23 | result.set(count); 24 | context.write(key, result); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /mr/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hdfs.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hdfs.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /mr/src/test/java/com/xcompany/xproject/mr/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.mr; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | com.xcompany.xproject 6 | hadoop 7 | pom 8 | 1.0.0-RELEASE 9 | 10 | 11 | 1.8 12 | 2.7.4 13 | 14 | 16 | 17 | 18 | 19 | 20 | 21 | junit 22 | junit 23 | 4.12 24 | test 25 | 26 | 36 | 37 | 38 | 39 | 73 | 74 | 75 | hdfs 76 | rpc 77 | mr 78 | hive 79 | hbase 80 | storm 81 | kafka 82 | storm-kafka 83 | scala 84 | spark 85 | sparkstreaming 86 | spark-streaming 87 | 88 | 89 | -------------------------------------------------------------------------------- /rpc/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /rpc/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /rpc/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | rpc 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.wst.jsdt.core.javascriptValidator 10 | 11 | 12 | 13 | 14 | org.eclipse.jdt.core.javabuilder 15 | 16 | 17 | 18 | 19 | org.eclipse.wst.common.project.facet.core.builder 20 | 21 | 22 | 23 | 24 | org.eclipse.wst.validation.validationbuilder 25 | 26 | 27 | 28 | 29 | org.eclipse.m2e.core.maven2Builder 30 | 31 | 32 | 33 | 34 | 35 | org.eclipse.jem.workbench.JavaEMFNature 36 | org.eclipse.wst.common.modulecore.ModuleCoreNature 37 | org.eclipse.jdt.core.javanature 38 | org.eclipse.m2e.core.maven2Nature 39 | org.eclipse.wst.common.project.facet.core.nature 40 | org.eclipse.wst.jsdt.core.jsNature 41 | 42 | 43 | -------------------------------------------------------------------------------- /rpc/.settings/.jsdtscope: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 4 | org.eclipse.jdt.core.compiler.compliance=1.5 5 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 6 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 7 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 8 | org.eclipse.jdt.core.compiler.source=1.5 9 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.common.component: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.common.project.facet.core.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.jsdt.ui.superType.container: -------------------------------------------------------------------------------- 1 | org.eclipse.wst.jsdt.launching.baseBrowserLibrary -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.jsdt.ui.superType.name: -------------------------------------------------------------------------------- 1 | Window -------------------------------------------------------------------------------- /rpc/.settings/org.eclipse.wst.validation.prefs: -------------------------------------------------------------------------------- 1 | disabled=06target 2 | eclipse.preferences.version=1 3 | -------------------------------------------------------------------------------- /rpc/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop 5 | com.xcompany.xproject 6 | 1.0.0-RELEASE 7 | 8 | 4.0.0 9 | rpc 10 | 11 | 12 | junit 13 | junit 14 | 4.12 15 | test 16 | 17 | 18 | hamcrest-core 19 | org.hamcrest 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /rpc/logs/hdfs.log: -------------------------------------------------------------------------------- 1 | [INFO ] 2017-12-08 10:31:32.239 [] [] [main] org.apache.hadoop.ipc.CallQueueManager.(CallQueueManager.java:57) Using callQueue: class java.util.concurrent.LinkedBlockingQueue queueCapacity: 100 2 | [INFO ] 2017-12-08 10:31:32.875 [] [] [Socket Reader #1 for port 8888] org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:722) Starting Socket Reader #1 for port 8888 3 | [WARN ] 2017-12-08 10:31:33.401 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 4 | [INFO ] 2017-12-08 10:31:33.470 [] [] [main] com.xcompany.xproject.rpc.HelloServer.main(HelloServer.java:32) Server start to listen on 8888 5 | [INFO ] 2017-12-08 10:31:33.486 [] [] [IPC Server listener on 8888] org.apache.hadoop.ipc.Server$Listener.run(Server.java:801) IPC Server listener on 8888: starting 6 | [INFO ] 2017-12-08 10:31:33.487 [] [] [IPC Server Responder] org.apache.hadoop.ipc.Server$Responder.run(Server.java:962) IPC Server Responder: starting 7 | [WARN ] 2017-12-08 10:31:50.842 [] [] [main] org.apache.hadoop.util.NativeCodeLoader.(NativeCodeLoader.java:62) Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 8 | [INFO ] 2017-12-08 10:31:51.734 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) JunneYang 9 | [INFO ] 2017-12-08 10:31:51.761 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:34) Hello JunneYang 10 | [INFO ] 2017-12-08 10:31:51.763 [] [] [IPC Server handler 0 on 8888] com.xcompany.xproject.rpc.HelloServer.helloMethod(HelloServer.java:18) Ares 11 | [INFO ] 2017-12-08 10:31:51.766 [] [] [main] com.xcompany.xproject.rpc.HelloClient.testHello(HelloClient.java:36) Hello Ares 12 | -------------------------------------------------------------------------------- /rpc/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | rpc 14 | jar 15 | 16 | 17 | 18 | org.apache.hadoop 19 | hadoop-client 20 | 2.7.4 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /rpc/src/main/java/com/xcompany/xproject/rpc/App.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.rpc; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /rpc/src/main/java/com/xcompany/xproject/rpc/HelloClient.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.rpc; 2 | 3 | import java.io.IOException; 4 | import java.net.InetSocketAddress; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | import org.apache.hadoop.ipc.RPC; 8 | import org.junit.After; 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | public class HelloClient { 15 | 16 | private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class); 17 | 18 | @Before 19 | public void setUp() { 20 | } 21 | @After 22 | public void tearDown() { 23 | } 24 | 25 | @Test 26 | public void testHello() throws IOException { 27 | String bindAddress = "node-01"; 28 | int port = 8888; 29 | InetSocketAddress addr = new InetSocketAddress(bindAddress, port); 30 | HelloProtocol proxy = RPC.getProxy( 31 | HelloProtocol.class, HelloProtocol.versionID, 32 | addr, new Configuration()); 33 | String resp = proxy.helloMethod("JunneYang"); 34 | LOGGER.info(resp); 35 | resp = proxy.helloMethod("Ares"); 36 | LOGGER.info(resp); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /rpc/src/main/java/com/xcompany/xproject/rpc/HelloProtocol.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.rpc; 2 | 3 | public interface HelloProtocol { 4 | 5 | public static final long versionID = 1L; 6 | public String helloMethod(String name); 7 | 8 | } 9 | -------------------------------------------------------------------------------- /rpc/src/main/java/com/xcompany/xproject/rpc/HelloServer.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.rpc; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.HadoopIllegalArgumentException; 6 | import org.apache.hadoop.conf.Configuration; 7 | import org.apache.hadoop.ipc.RPC; 8 | import org.apache.hadoop.ipc.RPC.Builder; 9 | import org.apache.hadoop.ipc.RPC.Server; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | public class HelloServer implements HelloProtocol { 14 | 15 | private static final Logger LOGGER = LoggerFactory.getLogger(HelloServer.class); 16 | 17 | public String helloMethod(String name) { 18 | LOGGER.info(name); 19 | return "Hello " + name; 20 | } 21 | 22 | public static void main(String[] args) throws HadoopIllegalArgumentException, IOException { 23 | Configuration conf = new Configuration(); 24 | Builder builder = new RPC.Builder(conf); 25 | String bindAddress = "node-01"; 26 | int port = 8888; 27 | builder.setBindAddress(bindAddress) 28 | .setPort(8888) 29 | .setProtocol(HelloProtocol.class) 30 | .setInstance(new HelloServer()); 31 | Server server = builder.build(); 32 | LOGGER.info("Server start to listen on " + port); 33 | server.start(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /rpc/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ### direct log messages to stdout ### 2 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target = System.out 4 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 6 | 7 | ### direct messages to file test.log ### 8 | log4j.appender.file = org.apache.log4j.RollingFileAppender 9 | log4j.appender.file.File= ./log/hdfs.log 10 | log4j.appender.file.Append = true 11 | log4j.appender.file.MaxFileSize = 1MB 12 | log4j.appender.file.MaxBackupIndex = 10 13 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.file.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 15 | 16 | log4j.appender.dfile = org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.dfile.File = ./logs/hdfs.log 18 | log4j.appender.dfile.Append = true 19 | log4j.appender.dfile.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.dfile.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{remoteAddr}] [%X{requestID}] [%t] %l %m%n 21 | 22 | ### set log levels - for more verbose logging change 'info' to 'debug' ### 23 | 24 | #log4j.logger.org.app=debug 25 | #log4j.logger.com.ares=debug, stdout, file, dfile 26 | #log4j.logger.com.xcloud=debug, stdout 27 | #log4j.additivity.com.ares=false 28 | 29 | # log4j.rootLogger=info, stdout 30 | log4j.rootLogger=info, stdout, file, dfile 31 | -------------------------------------------------------------------------------- /rpc/src/test/java/com/xcompany/xproject/rpc/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.rpc; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /scala/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /scala/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /scala/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | scala 4 | 5 | 6 | 7 | 8 | 9 | org.scala-ide.sdt.core.scalabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.m2e.core.maven2Nature 21 | org.scala-ide.sdt.core.scalanature 22 | org.eclipse.jdt.core.javanature 23 | 24 | 25 | -------------------------------------------------------------------------------- /scala/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.5 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 13 | org.eclipse.jdt.core.compiler.source=1.5 14 | -------------------------------------------------------------------------------- /scala/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /scala/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | com.xcompany.xproject 9 | hadoop 10 | 1.0.0-RELEASE 11 | 12 | 13 | scala 14 | 15 | 16 | 17 | 2.11.11 18 | 19 | 20 | 21 | 22 | 27 | 28 | junit 29 | junit 30 | 4.13.1 31 | test 32 | 33 | 34 | org.specs 35 | specs 36 | 1.2.5 37 | test 38 | 39 | 40 | 41 | 42 | 47 | 62 | 67 | 72 | 73 | 74 | 75 | 76 | 77 | scala-tools.org 78 | Scala-Tools Maven2 Repository 79 | http://scala-tools.org/repo-releases 80 | 81 | 82 | 83 | 84 | scala-tools.org 85 | Scala-Tools Maven2 Repository 86 | http://scala-tools.org/repo-releases 87 | 88 | 89 | 90 | src/main/scala 91 | src/test/scala 92 | 93 | 94 | org.scala-tools 95 | maven-scala-plugin 96 | 98 | 99 | ${scala.version} 100 | 101 | -target:jvm-1.8 102 | 103 | 104 | 105 | 106 | maven-eclipse-plugin 107 | 108 | true 109 | 110 | ch.epfl.lamp.sdt.core.scalabuilder 111 | 112 | 113 | ch.epfl.lamp.sdt.core.scalanature 114 | 115 | 116 | org.eclipse.jdt.launching.JRE_CONTAINER 117 | ch.epfl.lamp.sdt.launching.SCALA_CONTAINER 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | org.scala-tools 127 | maven-scala-plugin 128 | 129 | ${scala.version} 130 | 131 | 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /scala/src/main/scala/com/xcompany/xproject/scala/App.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.scala 2 | 3 | /** 4 | * http://blog.csdn.net/wuyinxian/article/details/38727717 5 | * http://download.scala-ide.org/sdk/helium/e38/scala211/stable/site 6 | */ 7 | 8 | 9 | object App { 10 | def main(args: Array[String]): Unit = { 11 | println("Hello World!"); 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /spark-streaming.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark-streaming.zip -------------------------------------------------------------------------------- /spark/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /spark/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /spark/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | spark 4 | 5 | 6 | 7 | 8 | 9 | org.scala-ide.sdt.core.scalabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.scala-ide.sdt.core.scalanature 21 | org.eclipse.jdt.core.javanature 22 | org.eclipse.m2e.core.maven2Nature 23 | 24 | 25 | -------------------------------------------------------------------------------- /spark/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 3 | org.eclipse.jdt.core.compiler.compliance=1.5 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.5 6 | -------------------------------------------------------------------------------- /spark/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427870000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427870000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427880000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427880000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427890000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427890000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427900000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427900000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427910000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427910000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427920000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427920000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427930000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427930000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427940000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427940000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427950000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427950000.crc -------------------------------------------------------------------------------- /spark/checkpoint/.checkpoint-1514427960000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/.checkpoint-1514427960000.crc -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427870000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427870000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427880000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427880000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427890000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427890000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427900000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427900000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427910000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427910000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427920000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427920000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427930000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427930000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427940000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427940000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427950000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427950000 -------------------------------------------------------------------------------- /spark/checkpoint/checkpoint-1514427960000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/checkpoint-1514427960000 -------------------------------------------------------------------------------- /spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427870017-1514427930017 -------------------------------------------------------------------------------- /spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/spark/checkpoint/receivedBlockMetadata/log-1514427932107-1514427992107 -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/App.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | object App { 8 | def main(args: Array[String]): Unit = { 9 | println("Hello World!"); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/WordCount.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.SparkContext 5 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions 6 | 7 | object WordCount { 8 | def main(args: Array[String]): Unit = { 9 | // println(args(0)) 10 | // println("Hello World!") 11 | val conf = new SparkConf().setAppName("WordCount") 12 | // .setMaster("spark://node-01:7077") 13 | // .setMaster("local") 14 | // .set("spark.executor.memory", "1g") 15 | // .set("spark.cores.max", "1") 16 | val sc = new SparkContext(conf) 17 | 18 | val lines = sc.textFile("file:///home/xxproject/workspace/xxhadoop/spark_data/") 19 | val words = lines.flatMap(line => line.split(" ")) 20 | val wordCounts = words.map(word => (word, 1)).reduceByKey((a, b) => a + b) 21 | wordCounts.collect().foreach(println) 22 | wordCounts.partitions.length 23 | // wordCounts.saveAsTextFile("file:///tmp/output") 24 | 25 | sc.stop() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/streaming/BroadcastWrapper.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark.streaming 2 | 3 | import scala.collection.mutable 4 | 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.broadcast.Broadcast 7 | import java.io.ObjectInputStream 8 | import java.io.ObjectOutputStream 9 | import java.util.Calendar 10 | import java.text.SimpleDateFormat 11 | 12 | //http://spark.apache.org/docs/2.2.0/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints 13 | object BroadcastWrapper { 14 | @volatile private var instance: Broadcast[Map[String, String]] = null 15 | private val map = mutable.LinkedHashMap[String, String]() 16 | 17 | def getUpdateInfo(): Map[String, String] = { 18 | val jedis_driver = RedisClient.pool.getResource 19 | println("=====GET_DRIVER") 20 | val is_update = jedis_driver.lpop("is_update") 21 | println("is_update: " + is_update) 22 | println("=====READ_DRIVER") 23 | 24 | // if (null == is_update) { 25 | // rdd.sparkContext.broadcast(is_update) 26 | // } 27 | 28 | jedis_driver.close() 29 | println("=====CLOSE_DRIVER") 30 | 31 | map += ("is_update" -> is_update) 32 | val broadcast_info = jedis_driver.get("broadcast_info") 33 | map += ("broadcast_info" -> broadcast_info) 34 | map.toMap 35 | } 36 | 37 | def getInstance(sc: SparkContext): Broadcast[Map[String, String]] = { 38 | if (instance == null) { 39 | synchronized { 40 | if (instance == null) { 41 | val updateInfo = getUpdateInfo() 42 | // https://www.jianshu.com/p/95896d06a94d 43 | if (Some(null) != updateInfo.get("is_update")) { 44 | instance = sc.broadcast(updateInfo) 45 | } 46 | } 47 | } 48 | } 49 | instance 50 | } 51 | 52 | def broadcastInfo(sc: SparkContext, blocking: Boolean = false): Unit = { 53 | val updateInfo = getUpdateInfo() 54 | // https://www.jianshu.com/p/95896d06a94d 55 | if (Some(null) != updateInfo.get("is_update")) { 56 | if (instance != null) { 57 | instance.unpersist(blocking) 58 | } 59 | instance = sc.broadcast(updateInfo) 60 | } 61 | 62 | // val calendar = Calendar.getInstance() 63 | // val date = calendar.getTime() 64 | // val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss") 65 | // val dateFormat = format.format(date) 66 | // println("=====broadcat success: " + dateFormat) 67 | } 68 | 69 | // private def writeObject(out: ObjectOutputStream): Unit = { 70 | // out.writeObject(instance) 71 | // } 72 | // 73 | // private def readObject(in: ObjectInputStream): Unit = { 74 | // instance = in.readObject().asInstanceOf[Broadcast[Map[String, String]]] 75 | // } 76 | 77 | } 78 | 79 | -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/streaming/NetworkWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark.streaming 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.streaming.Seconds 5 | import org.apache.spark.streaming.StreamingContext 6 | import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions 7 | 8 | object NetworkWordCount { 9 | def main(args: Array[String]): Unit = { 10 | val conf = new SparkConf().setAppName("NetworkWordCount") 11 | // .setMaster("spark://node-01:7077") 12 | // .setMaster("local") 13 | // .set("spark.executor.memory", "1g") 14 | // .set("spark.cores.max", "1") 15 | val ssc = new StreamingContext(conf, Seconds(10)) 16 | val lines = ssc.socketTextStream("node-01", 9999) 17 | val words = lines.flatMap(_.split(" ")) 18 | val pairs = words.map(word => (word, 1)) 19 | val wordCounts = pairs.reduceByKey(_ + _) 20 | wordCounts.print() 21 | 22 | 23 | wordCounts.foreachRDD { rdd => 24 | rdd.foreachPartition { partitionOfRecords => { 25 | // val connection = ConnectionPool.getConnection() 26 | partitionOfRecords.foreach(record => { 27 | // val sql = "insert into streaming_itemcount(item,count) values('" + record._1 + "'," + record._2 + ")" 28 | // val stmt = connection.createStatement 29 | // stmt.executeUpdate(sql) 30 | println(record) 31 | }) 32 | // ConnectionPool.returnConnection(connection) 33 | }} 34 | } 35 | 36 | ssc.start() // Start the computation 37 | ssc.awaitTermination() // Wait for the computation to terminate 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/streaming/RedisClient.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark.streaming 2 | 3 | import org.apache.commons.pool2.impl.GenericObjectPoolConfig 4 | import redis.clients.jedis.JedisPool 5 | import redis.clients.jedis.JedisPoolConfig 6 | 7 | //http://blog.csdn.net/qq_26525215/article/details/60466222 8 | //https://segmentfault.com/a/1190000005085077 9 | object RedisClient { 10 | val host = "node-04" 11 | val port = 63791 12 | val timeout = 50000 13 | val password = "123456" 14 | val database = 0 15 | val maxTotal = 10 16 | val maxIdle = 5 17 | val maxWaitMillis = timeout 18 | val testOnBorrow = true 19 | 20 | val config = new JedisPoolConfig 21 | config.setMaxTotal(maxTotal) 22 | config.setMaxIdle(maxIdle) 23 | config.setMaxWaitMillis(maxWaitMillis) 24 | config.setTestOnBorrow(testOnBorrow) 25 | 26 | // must lazy 27 | lazy val pool = new JedisPool(config, host, port, timeout, password, database) 28 | 29 | lazy val hook = new Thread { 30 | override def run = { 31 | println("Execute hook thread: " + this) 32 | pool.destroy() 33 | } 34 | } 35 | sys.addShutdownHook(hook.run) 36 | } 37 | -------------------------------------------------------------------------------- /spark/src/main/scala/com/xcompany/xproject/spark/streaming/WaitForReady.scala: -------------------------------------------------------------------------------- 1 | package com.xcompany.xproject.spark.streaming 2 | 3 | object WaitForReady { 4 | private val PREFIX = "streaming" 5 | private val IS_READY = PREFIX + ":is_ready" 6 | 7 | def waitForReady(): Unit = { 8 | val jedis_main = RedisClient.pool.getResource 9 | var is_ready = jedis_main.get(IS_READY) 10 | print(is_ready.toBoolean) 11 | while (null == is_ready) { 12 | println("is_ready: " + is_ready + ", continue waitFor...") 13 | Thread.sleep(5000) 14 | is_ready = jedis_main.get("is_ready") 15 | } 16 | jedis_main.close() 17 | println("is_ready: " + is_ready + ", start to submitJob...") 18 | } 19 | 20 | def main(args: Array[String]): Unit = { 21 | waitForReady() 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /spark/src/test/scala/com/xcompany/xproject/spark/AppTest.scala: -------------------------------------------------------------------------------- 1 | //package com.xcompany.xproject.spark 2 | // 3 | //import org.junit._ 4 | //import Assert._ 5 | // 6 | //@Test 7 | //class AppTest { 8 | // 9 | // @Test 10 | // def testOK() = assertTrue(true) 11 | // 12 | //// @Test 13 | //// def testKO() = assertTrue(false) 14 | // 15 | //} 16 | // 17 | // 18 | -------------------------------------------------------------------------------- /spark/src/test/scala/com/xcompany/xproject/spark/MySpec.scala: -------------------------------------------------------------------------------- 1 | //package com.xcompany.xproject.spark 2 | // 3 | //import org.specs._ 4 | //import org.specs.runner.{ConsoleRunner, JUnit4} 5 | // 6 | //class MySpecTest extends JUnit4(MySpec) 7 | ////class MySpecSuite extends ScalaTestSuite(MySpec) 8 | //object MySpecRunner extends ConsoleRunner(MySpec) 9 | // 10 | //object MySpec extends Specification { 11 | // "This wonderful system" should { 12 | // "save the world" in { 13 | // val list = Nil 14 | // list must beEmpty 15 | // } 16 | // } 17 | //} 18 | -------------------------------------------------------------------------------- /spark_data/a.txt: -------------------------------------------------------------------------------- 1 | hello tom 2 | hello jerry 3 | hello tom 4 | -------------------------------------------------------------------------------- /spark_data/b.txt: -------------------------------------------------------------------------------- 1 | hello jerry 2 | hello jerry 3 | tom jerry 4 | -------------------------------------------------------------------------------- /spark_data/c.txt: -------------------------------------------------------------------------------- 1 | hello jerry 2 | hello tom 3 | -------------------------------------------------------------------------------- /sparkstreaming.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/sparkstreaming.zip -------------------------------------------------------------------------------- /storm-kafka.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm-kafka.zip -------------------------------------------------------------------------------- /storm.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/storm.zip -------------------------------------------------------------------------------- /tensorflow/01-TemsorFlow的模块与API.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/01-TemsorFlow的模块与API.png -------------------------------------------------------------------------------- /tensorflow/02-TensorFlow架构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/02-TensorFlow架构.png -------------------------------------------------------------------------------- /tensorflow/02-TensorFlow核心基础知识.txt: -------------------------------------------------------------------------------- 1 | 一、Tensor张量使用(可以使用constant、variable、placeholder进行赋值) 2 | import tensorflow as tf 3 | tf.constant("Hello, TensorFlow", dtype=tf.string) 4 | tf.constant([1, 2, 3, 4, 5], dtype=tf.int32) 5 | tf.Variable([[1, 2], [3, 4]], dtype=tf.int32) 6 | tf.zeros(shape=(2, 3, 4), dtype=tf.int32) 7 | tf.constant(0, shape=(2, 3, 4), dtype=tf.int32) 8 | a = tf.constant(0, shape=(2, 3, 4), dtype=tf.int32) 9 | tf.rank(a) 10 | 11 | 二、Variable变量使用(2.X版本之前默认是懒加载,2.X之后是即时加载; 12 | 2.X版本之前使用Saver、之后使用Checkpoint进行变量的存储与恢复操作) 13 | import tensorflow as tf 14 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1)) 15 | b = tf.Variable(tf.zeros([10])) 16 | print([W, b]) 17 | b.assign(b + tf.constant(1.0, shape=[10])) 18 | checkpoint = tf.train.Checkpoint(W=W, b=b) 19 | checkpoint.save('./demo/demo-model') 20 | b.assign(b + tf.constant(1.0, shape=[10])) 21 | print(W, b) 22 | checkpoint.restore('./demo/demo-model-1') 23 | print(W, b) 24 | 25 | 26 | import tensorflow as tf 27 | tf.compat.v1.disable_eager_execution() 28 | W = tf.Variable(tf.random.normal(shape=[1, 10], mean=0, stddev=1)) 29 | b = tf.Variable(tf.zeros([10])) 30 | print([W, b]) 31 | sess = tf.compat.v1.Session() 32 | sess.run(tf.compat.v1.global_variables_initializer()) 33 | sess.run([W, b]) 34 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10]))) 35 | saver = tf.compat.v1.train.Saver({'W': W, 'b': b}) 36 | saver.save(sess, './demo/demo-model', global_step=0) 37 | sess.run(tf.compat.v1.assign(b, b + tf.constant(1.0, shape=[10]))) 38 | saver.restore(sess, './demo/demo-model-0') 39 | sess.run(b) 40 | 41 | 三、占位符与操作的使用(2.X版本已经移除了placeholder,使用placeholder定义占位符,使用feed填充占位符) 42 | import tensorflow as tf 43 | tf.compat.v1.disable_eager_execution() 44 | a = tf.constant(123) 45 | b = tf.constant(456) 46 | x = tf.compat.v1.placeholder(tf.int16, shape=()) 47 | y = tf.compat.v1.placeholder(tf.int16, shape=()) 48 | add = tf.add(x, y) 49 | mul = tf.multiply(x, y) 50 | sess = tf.compat.v1.Session() 51 | sess.run(add, feed_dict={x: 10, y: 5}) 52 | sess.run(mul, feed_dict={x: 2, y: 3}) 53 | 54 | 四、查看设备列表 55 | import tensorflow as tf 56 | tf.config.list_physical_devices() 57 | 58 | from tensorflow.python.client import device_lib 59 | dl = device_lib.list_local_devices() 60 | print(dl) 61 | 62 | import tensorflow as tf 63 | print([tf.__version__, tf.test.is_gpu_available()]) 64 | -------------------------------------------------------------------------------- /tensorflow/1-机器学习基础.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/10-模型定义与查看.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/4-数据存储DataStore访问.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/5-注册数据集Dataset.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/MLOps流水线参考.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/MLOps流水线参考.png -------------------------------------------------------------------------------- /tensorflow/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tensorflow/env/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tensorflow/env/aml-demo-conda-dependencies.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/requests_futures使用参考.txt: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from requests_futures.sessions import FuturesSession 3 | from concurrent.futures import as_completed 4 | 5 | 6 | session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10)) 7 | 8 | futures=[session.post(f'http://httpbin.org/post', json={"name": "helloworld-" + str(i)}, headers={"Content-Type":"application/json"}) for i in range(3)] 9 | 10 | for future in as_completed(futures): 11 | resp = future.result() 12 | print(resp.text) 13 | 14 | -------------------------------------------------------------------------------- /tensorflow/src/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tensorflow/src/job-dist.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/src/job.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/src/train.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorflow/性能测试.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/tensorflow/性能测试.png -------------------------------------------------------------------------------- /我的书签.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junneyang/xxhadoop/5bbd2c7b74929ae41b755600e0d7e4698d281f39/我的书签.rar --------------------------------------------------------------------------------