├── notes
├── 大数据技术栈思维导图.md
├── installation
│ ├── Linux下JDK安装.md
│ ├── Linux下Flume的安装.md
│ ├── Linux下Python安装.md
│ └── Storm单机环境搭建.md
├── 资料分享与工具推荐.md
├── 大数据常用软件安装指南.md
└── Azkaban简介.md
├── pictures
├── flink.png
├── flume.png
├── hbase.jpg
├── hbase.png
├── hive.jpg
├── kafka.png
├── oozie.jpg
├── scala.jpg
├── spark.jpg
├── sqoop.png
├── storm.png
├── azkaban.png
├── hadoop.jpg
├── hdfs-机架.png
├── hive-e.png
├── storm集群.png
├── akaban-jps.png
├── azkaban-mr.png
├── blog-logo.png
├── deprecated.png
├── flink-lib.png
├── hadoop安装验证.png
├── hadoop集群规划.png
├── hbase-arc.png
├── hbase集群规划.png
├── hive-emp.png
├── hive-mysql.png
├── hive-n-j.png
├── hive-ouput.png
├── hive体系架构.png
├── ifconfig.png
├── ipconfig.png
├── kafka-BIO.png
├── kafka-分区副本.png
├── kafka-零拷贝.png
├── kafka消费者.png
├── scala-int+.png
├── scala-sdk.png
├── scala-分区数.png
├── scala-视图界定.png
├── scala继承层次.png
├── spark-DAG.png
├── spark-pi.png
├── spark-内存不足.png
├── spark-分区.png
├── spark-累加器1.png
├── spark-累加器2.png
├── spark-运行安全.png
├── spark-集群模式.png
├── spark集群规划.png
├── spout-bolt.png
├── sql-join.jpg
├── sqoop-help.png
├── sqoop-tool.png
├── storm-flow.png
├── storm-jar.png
├── storm-jar2.png
├── storm-lib.png
├── storm-集群规划.png
├── storm集群搭建2.png
├── yarn-base.png
├── yarn工作原理.png
├── yarn工作原理简图.png
├── zookeeper.jpg
├── 大数据处理简化流程.png
├── 大数据技术栈思维导图.png
├── azkaban-edit.png
├── azkaban-hdfs.png
├── azkaban-hive.png
├── azkaban-log.png
├── azkaban-web.png
├── azkaban-zip.png
├── flink-maven.png
├── flink-stack.png
├── flume-kafka.png
├── flume-retry.png
├── hadoop-集群搭建2.png
├── hadoop-集群搭建3.png
├── hadoop高可用集群1.png
├── hadoop高可用集群2.png
├── hadoop高可用集群3.png
├── hadoop高可用集群4.png
├── hadoop高可用集群5.png
├── hbase-60010.png
├── hbase-fliter.png
├── hbase-hadoop.png
├── hbase-web-ui.png
├── hbase-集群搭建1.png
├── hbase-集群搭建2.png
├── hdfs-read-1.jpg
├── hdfs-write-1.jpg
├── hdfs-write-2.jpg
├── hdfs-write-3.jpg
├── hive-beeline.png
├── hive-emp-ptn.png
├── hive-emp-ts.png
├── kafka-topic.png
├── kafka-元数据请求.png
├── kafka-数据可见性.png
├── mutli-net-ip.png
├── scala-plugin.png
├── scala-select.png
├── scala-shell.png
├── scala-操作符优先级.png
├── scala带有特质的对象.png
├── spark-scheme.png
├── spark-shell.png
├── spark-stack.png
├── spark-unifed.png
├── spark-web-ui.png
├── spark-内存不足2.png
├── spark-累加器方法.png
├── spark-集群搭建1.png
├── spark-集群搭建2.png
├── spark-集群搭建3.png
├── spark-集群搭建4.png
├── spark-集群搭建5.png
├── sqoop_hbase.png
├── storm-bolts.png
├── storm-spouts.png
├── storm-tuples.png
├── storm-web-ui.png
├── storm-集群搭建1.png
├── writetokafka.png
├── 大数据技术栈思维导图.xmind
├── Phoenix-delete.png
├── Phoenix-hadoop.png
├── Phoenix-select.png
├── Phoenix-update.png
├── RegionObserver.png
├── azkaban-execute.png
├── azkaban-flows.png
├── azkaban-memory.png
├── azkaban-setting.png
├── azkaban-simple.png
├── azkaban-upload.png
├── azkaban-web-ui.png
├── flink-Rescaling.png
├── flink-api-stack.png
├── flink-dashboard.png
├── flink-download.png
├── flink-maven-new.png
├── flink-process.png
├── flink-richsink.png
├── flume-example-1.png
├── flume-example-2.png
├── flume-example-3.png
├── flume-example-4.png
├── flume-example-7.png
├── flume-example-8.png
├── flume-example-9.png
├── flume-kafka-01.png
├── flume-kafka-2.png
├── flume-version.png
├── future-of-spark.png
├── hadoop-QJM-同步机制.png
├── hadoop-combiner.png
├── hadoop-yarn安装验证.png
├── hadoop-集群环境搭建.png
├── hadoop高可用集群规划.png
├── hbase-co-unload.png
├── hbase-cp-hdfs.png
├── hbase-cp-lisi.png
├── hbase-cp-load.png
├── hbase-webtable.png
├── hdfsdatanodes.png
├── hive-1-2-view.png
├── hive-data-type.png
├── hive-emp-ts-2.png
├── hive-index-show.png
├── hive-install-2.png
├── hive-order-by.png
├── hive-right-join.png
├── hive-select-emp.png
├── kafka-cluster.png
├── mapreduce-sort.png
├── phoenix-shell.png
├── readfromkafka.png
├── scala-richInt.png
├── spark-download.png
├── spark-sql-shell.png
├── spark-sql-自定义函数.png
├── spark-窄依赖和宽依赖.png
├── spark-运行时类型安全.png
├── sql-hive-arch.png
├── sqoop-hive-hdfs.png
├── sqoop-map-task.png
├── sqoop-mysql-jar.png
├── sqoop-version.png
├── sqoop_hdfs_ls.png
├── storm-list-kill.png
├── storm-streams.png
├── storm-topology.png
├── storm-集群-shell.png
├── streaming-flow.png
├── topology-tasks.png
├── virtualbox启用网络.png
├── zookeeper-super.png
├── HashMap-HashTable.png
├── Stream groupings.png
├── azkaban-successed.png
├── datasourcetohdfs.png
├── flink-keyed-state.png
├── flink-mysql-sink.png
├── flink-scala-shell.png
├── flink-tasks-slots.png
├── flink-word-count.png
├── hadoop-ha高可用集群架构.png
├── hbase-connection.png
├── hbase-coprocessor.png
├── hbase-copy-table.png
├── hbase-region-dis.png
├── hbase-unload-test.png
├── hdfs-tolerance-1.jpg
├── hdfs-tolerance-2.jpg
├── hdfs-tolerance-3.jpg
├── hdfs-tolerance-4.jpg
├── hdfs-tolerance-5.jpg
├── hdfsarchitecture.png
├── hive-beeline-cli.png
├── hive-index-table.png
├── hive-mysql-tables.png
├── idea-scala-2.1.8.png
├── idea-scala-change.png
├── idea-scala-plugin.png
├── idea-scala-select.png
├── kafka-consumer01.png
├── kafka-consumer02.png
├── mapreduceProcess.png
├── phoenix-core-jar.png
├── scala-collection.png
├── scala-hello-world.png
├── spark-flume-input.png
├── spark-getpartnum.png
├── spark-mysql-分区上下限.png
├── spark-reducebykey.png
├── spark-shell-local.png
├── sqoop_hive_error.png
├── sqoop_hive_table.png
├── sqoop_hive_tables.png
├── storm-hdfs-result.png
├── storm-ui-actions.png
├── zookeeper-brocast.jpg
├── zookeeper-cluster.png
├── CustomRedisCountApp.png
├── HBase_table-iteblog.png
├── Phoenix-create-table.png
├── WordCountToHBaseApp.png
├── azkaban-click-edit.png
├── azkaban-dependencies.png
├── azkaban-embeded-flow.png
├── azkaban-hive-result.png
├── azkaban-project-edit.png
├── azkaban-simle-result.png
├── azkaban-task-abcde.png
├── bigdata-notes-icon.png
├── bigdata-notes-icon.psd
├── curator-retry-policy.png
├── flink-basis-project.png
├── flink-maven-profile.png
├── flink-non-windowed.png
├── flink-operator-state.png
├── flink-subtask-slots.png
├── flink-task-subtask.png
├── flink-yarn-session.png
├── flume-architecture.png
├── flume-consolidation.png
├── hadoop-code-mapping.png
├── hadoop-code-reducer.png
├── hadoop-namenode主备切换.png
├── hadoop-no-combiner.png
├── hadoop-wordcountapp.png
├── hbase-Region-Server.png
├── hbase-compareFilter.png
├── hbase-cp-helloworld.png
├── hbase-region-splite.png
├── hbase-web-ui-phoenix.png
├── hive-emp-deptno-20.png
├── hive-external-table.png
├── hive-hadoop-bucket.png
├── hive-show-database.png
├── hive-view-properties.png
├── kafka-cluster-shell.png
├── kafka-send-messgaes.png
├── mapreduce-combiner.png
├── scala-collection-imm.png
├── scala-collection-m.png
├── spark-aggregateByKey.png
├── spark-dataFrame+RDDs.png
├── spark-flume-console.png
├── spark-shell-web-ui.png
├── spark-streaming-arch.png
├── spark-streaming-flow.png
├── spark-structure-api.png
├── sqoop-hive-location.png
├── sqoop-list-databases.png
├── sqoop-mysql-connect.png
├── sqoop_hive_success.png
├── store-redis-manager.png
├── storm-Redis-Mapper.png
├── storm-baseRichSpout.png
├── storm-baseRichbolt.png
├── storm-hbase-result.png
├── storm-jedicCommands.png
├── storm-kafka-producer.png
├── storm-kafka-receiver.png
├── storm-package-error.png
├── storm-submit-success.png
├── storm-word-count-p.png
├── strom-kafka-consumer.png
├── zookeeper-hadoop001.png
├── zookeeper-hadoop002.png
├── zookeeper-hadoop003.png
├── zookeeper-zkservice.jpg
├── RegionObservers-works.png
├── azkaban-create-project.png
├── azkaban-gradle-wrapper.png
├── azkaban-task-abcde-zip.png
├── flink-on-yarn-session.jpg
├── flink-session-windows.png
├── flink-sliding-windows.png
├── flink-socket-wordcount.png
├── flink-state-management.png
├── flink-stateful-stream.png
├── flink-stream-barriers.png
├── flink-task-parallelism.png
├── flink-tumbling-windows.png
├── flume-multi-agent-flow.png
├── hadoop-rm-ha-overview.png
├── hive-emp-deptno-20-30.png
├── hive-hadoop-mapreducer.png
├── idea-newproject-scala.png
├── jar-with-dependencies.png
├── kafka-compress-message.png
├── kafka-simple-producer.png
├── scala-ordered-ordering.png
├── scala-other-resources.png
├── spark-Logical-Planning.png
├── spark-sql-NATURAL-JOIN.png
├── spring-mybatis-phoenix.png
├── sqoop-version-selected.png
├── storm-wordcounttoredis.png
├── zookeeper-zkcomponents.jpg
├── zookeeper-zknamespace.jpg
├── HADOOP-ECOSYSTEM-Edureka.png
├── Phoenix-java-api-result.png
├── azkaban-embeded-success.png
├── azkaban-gradle-wrapper-2.png
├── flink-bounded-unbounded.png
├── flink-checkpoints-backend.png
├── flink-optional-components.png
├── flink-standalone-cluster.jpg
├── flink-start-cluster-shell.png
├── flink-window-word-count.png
├── hadoop-code-partitation.png
├── hbase-bytearraycomparable.png
├── hbase-filterbase-subclass.png
├── hive-hadoop-partitation.png
├── kafka-producer-consumer.png
├── mapreduce-with-combiners.png
├── spark-Physical-Planning.png
├── spark-Standalone-web-ui.png
├── spark-streaming-flume-jar.png
├── storm-abstractRedisBolt.png
├── storm-jar-complie-error.png
├── storm-word-count-console.png
├── virtualbox-multi-network.png
├── Figure3Architecture-of-YARN.png
├── HBaseArchitecture-Blog-Fig1.png
├── HBaseArchitecture-Blog-Fig2.png
├── HBaseArchitecture-Blog-Fig3.png
├── HBaseArchitecture-Blog-Fig4.png
├── HBaseArchitecture-Blog-Fig5.png
├── HBaseArchitecture-Blog-Fig6.png
├── HBaseArchitecture-Blog-Fig7.png
├── flink-operator-state-para1.png
├── flink-operator-state-para2.png
├── flink-standalone-cluster-ha.png
├── flume-multiplexing-the-flow.png
├── mapreduce-without-combiners.png
├── spark-streaming-dstream-ops.png
├── spring-boot-mybatis-phoenix.png
├── 01_data_at_rest_infrastructure.png
├── HDFS-HA-Architecture-Edureka.png
├── flink-application-submission.png
├── flink-kafka-datasource-console.png
├── flink-kafka-producer-consumer.png
├── flink-socket-wordcount-stdout.png
├── flink-standalone-cluster-jps.png
├── spark-Big-table–to–big-table.png
├── spark-Big-table–to–small-table.png
├── spark-straming-kafka-console.png
├── spark-streaming-word-count-v1.png
├── spark-streaming-word-count-v2.png
├── spark-streaming-word-count-v3.png
├── Internal-Working-of-Apache-Storm.png
├── flink-RichParallelSourceFunction.png
├── flink-kafka-datasource-producer.png
├── 02_stream_processing_infrastructure.png
├── hadoop-wordcountcombinerpartition.png
├── Detailed-Hadoop-MapReduce-Data-Flow-14.png
└── relationships-worker-processes-executors-tasks.png
├── code
├── Flink
│ ├── flink-basis-scala
│ │ └── src
│ │ │ └── main
│ │ │ ├── resources
│ │ │ ├── wordcount.txt
│ │ │ └── log4j.properties
│ │ │ └── scala
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ ├── WordCountBatch.scala
│ │ │ └── WordCountStreaming.scala
│ ├── flink-basis-java
│ │ └── src
│ │ │ └── main
│ │ │ ├── java
│ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ └── StreamingJob.java
│ │ │ └── resources
│ │ │ └── log4j.properties
│ ├── flink-kafka-integration
│ │ └── src
│ │ │ └── main
│ │ │ ├── java
│ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ ├── bean
│ │ │ │ └── Employee.java
│ │ │ │ ├── CustomSinkJob.java
│ │ │ │ └── sink
│ │ │ │ └── FlinkToMySQLSink.java
│ │ │ └── resources
│ │ │ └── log4j.properties
│ └── flink-state-management
│ │ └── src
│ │ └── main
│ │ ├── java
│ │ └── com
│ │ │ └── heibaiying
│ │ │ ├── keyedstate
│ │ │ ├── KeyedStateJob.java
│ │ │ └── ThresholdWarning.java
│ │ │ └── operatorstate
│ │ │ └── OperatorStateJob.java
│ │ └── resources
│ │ └── log4j.properties
├── Phoenix
│ ├── spring-mybatis-phoenix
│ │ └── src
│ │ │ ├── main
│ │ │ ├── resources
│ │ │ │ ├── jdbc.properties
│ │ │ │ ├── mybatisConfig.xml
│ │ │ │ ├── mappers
│ │ │ │ │ └── Population.xml
│ │ │ │ └── springApplication.xml
│ │ │ └── java
│ │ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ ├── bean
│ │ │ │ └── USPopulation.java
│ │ │ │ └── dao
│ │ │ │ └── PopulationDao.java
│ │ │ └── test
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── dao
│ │ │ └── PopulationDaoTest.java
│ └── spring-boot-mybatis-phoenix
│ │ └── src
│ │ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ └── springboot
│ │ │ │ ├── bean
│ │ │ │ └── USPopulation.java
│ │ │ │ ├── SpringBootMybatisApplication.java
│ │ │ │ └── dao
│ │ │ │ └── PopulationDao.java
│ │ └── resources
│ │ │ └── application.yml
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── heibaiying
│ │ └── springboot
│ │ └── PopulationTest.java
├── Hadoop
│ ├── hadoop-word-count
│ │ ├── src
│ │ │ └── main
│ │ │ │ ├── resources
│ │ │ │ └── log4j.properties
│ │ │ │ └── java
│ │ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ └── component
│ │ │ │ ├── CustomPartitioner.java
│ │ │ │ ├── WordCountReducer.java
│ │ │ │ └── WordCountMapper.java
│ │ └── pom.xml
│ └── hdfs-java-api
│ │ └── pom.xml
├── spark
│ ├── spark-streaming-basis
│ │ ├── src
│ │ │ └── main
│ │ │ │ └── java
│ │ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ ├── NetworkWordCount.scala
│ │ │ │ ├── utils
│ │ │ │ └── JedisPoolUtil.java
│ │ │ │ ├── NetworkWordCountToRedis.scala
│ │ │ │ └── NetworkWordCountV2.scala
│ │ └── pom.xml
│ ├── spark-streaming-flume
│ │ └── src
│ │ │ └── main
│ │ │ └── scala
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── flume
│ │ │ ├── PushBasedWordCount.scala
│ │ │ └── PullBasedWordCount.scala
│ └── spark-streaming-kafka
│ │ ├── pom.xml
│ │ └── src
│ │ └── main
│ │ └── scala
│ │ └── com
│ │ └── heibaiying
│ │ └── kafka
│ │ └── KafkaDirectStream.scala
├── Kafka
│ └── kafka-basis
│ │ ├── src
│ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ ├── producers
│ │ │ ├── partitioners
│ │ │ │ └── CustomPartitioner.java
│ │ │ ├── SimpleProducer.java
│ │ │ ├── ProducerWithPartitioner.java
│ │ │ ├── ProducerASyn.java
│ │ │ └── ProducerSyn.java
│ │ │ └── consumers
│ │ │ ├── ConsumerSyn.java
│ │ │ ├── ConsumerGroup.java
│ │ │ ├── ConsumerASynAndSyn.java
│ │ │ └── ConsumerASyn.java
│ │ └── pom.xml
├── Hbase
│ ├── hbase-observer-coprocessor
│ │ ├── pom.xml
│ │ └── src
│ │ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── AppendRegionObserver.java
│ ├── hbase-java-api-2.x
│ │ └── pom.xml
│ └── hbase-java-api-1.x
│ │ └── pom.xml
├── Storm
│ ├── storm-word-count
│ │ ├── src
│ │ │ └── main
│ │ │ │ ├── resources
│ │ │ │ └── assembly.xml
│ │ │ │ └── java
│ │ │ │ └── com
│ │ │ │ └── heibaiying
│ │ │ │ └── wordcount
│ │ │ │ ├── LocalWordCountApp.java
│ │ │ │ ├── component
│ │ │ │ ├── SplitBolt.java
│ │ │ │ ├── CountBolt.java
│ │ │ │ └── DataSourceSpout.java
│ │ │ │ └── ClusterWordCountApp.java
│ │ └── pom.xml
│ ├── storm-redis-integration
│ │ └── src
│ │ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── component
│ │ │ ├── WordCountStoreMapper.java
│ │ │ ├── SplitBolt.java
│ │ │ ├── CountBolt.java
│ │ │ └── DataSourceSpout.java
│ ├── storm-kafka-integration
│ │ └── src
│ │ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── kafka
│ │ │ ├── read
│ │ │ └── LogConsoleBolt.java
│ │ │ └── write
│ │ │ └── DataSourceSpout.java
│ ├── storm-hbase-integration
│ │ └── src
│ │ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── heibaiying
│ │ │ └── component
│ │ │ ├── SplitBolt.java
│ │ │ ├── CountBolt.java
│ │ │ └── DataSourceSpout.java
│ └── storm-hdfs-integration
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── com.heibaiying
│ │ └── component
│ │ └── DataSourceSpout.java
└── Zookeeper
│ └── curator
│ └── pom.xml
├── resources
├── orc
│ └── dept.orc
├── csv
│ └── dept.csv
├── tsv
│ ├── dept.tsv
│ └── emp.tsv
├── txt
│ ├── dept.txt
│ └── emp.txt
├── parquet
│ ├── dept.parquet
│ └── emp.parquet
├── mysql-connector-java-5.1.47.jar
└── json
│ ├── dept.json
│ └── emp.json
└── .gitignore
/notes/大数据技术栈思维导图.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/pictures/flink.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink.png
--------------------------------------------------------------------------------
/pictures/flume.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume.png
--------------------------------------------------------------------------------
/pictures/hbase.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase.jpg
--------------------------------------------------------------------------------
/pictures/hbase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase.png
--------------------------------------------------------------------------------
/pictures/hive.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive.jpg
--------------------------------------------------------------------------------
/pictures/kafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka.png
--------------------------------------------------------------------------------
/pictures/oozie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/oozie.jpg
--------------------------------------------------------------------------------
/pictures/scala.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala.jpg
--------------------------------------------------------------------------------
/pictures/spark.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark.jpg
--------------------------------------------------------------------------------
/pictures/sqoop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop.png
--------------------------------------------------------------------------------
/pictures/storm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm.png
--------------------------------------------------------------------------------
/code/Flink/flink-basis-scala/src/main/resources/wordcount.txt:
--------------------------------------------------------------------------------
1 | a,a,a,a,a
2 | b,b,b
3 | c,c
4 | d,d
5 |
--------------------------------------------------------------------------------
/pictures/azkaban.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban.png
--------------------------------------------------------------------------------
/pictures/hadoop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-机架.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-机架.png
--------------------------------------------------------------------------------
/pictures/hive-e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-e.png
--------------------------------------------------------------------------------
/pictures/storm集群.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm集群.png
--------------------------------------------------------------------------------
/pictures/akaban-jps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/akaban-jps.png
--------------------------------------------------------------------------------
/pictures/azkaban-mr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-mr.png
--------------------------------------------------------------------------------
/pictures/blog-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/blog-logo.png
--------------------------------------------------------------------------------
/pictures/deprecated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/deprecated.png
--------------------------------------------------------------------------------
/pictures/flink-lib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-lib.png
--------------------------------------------------------------------------------
/pictures/hadoop安装验证.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop安装验证.png
--------------------------------------------------------------------------------
/pictures/hadoop集群规划.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop集群规划.png
--------------------------------------------------------------------------------
/pictures/hbase-arc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-arc.png
--------------------------------------------------------------------------------
/pictures/hbase集群规划.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase集群规划.png
--------------------------------------------------------------------------------
/pictures/hive-emp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp.png
--------------------------------------------------------------------------------
/pictures/hive-mysql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-mysql.png
--------------------------------------------------------------------------------
/pictures/hive-n-j.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-n-j.png
--------------------------------------------------------------------------------
/pictures/hive-ouput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-ouput.png
--------------------------------------------------------------------------------
/pictures/hive体系架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive体系架构.png
--------------------------------------------------------------------------------
/pictures/ifconfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/ifconfig.png
--------------------------------------------------------------------------------
/pictures/ipconfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/ipconfig.png
--------------------------------------------------------------------------------
/pictures/kafka-BIO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-BIO.png
--------------------------------------------------------------------------------
/pictures/kafka-分区副本.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-分区副本.png
--------------------------------------------------------------------------------
/pictures/kafka-零拷贝.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-零拷贝.png
--------------------------------------------------------------------------------
/pictures/kafka消费者.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka消费者.png
--------------------------------------------------------------------------------
/pictures/scala-int+.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-int+.png
--------------------------------------------------------------------------------
/pictures/scala-sdk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-sdk.png
--------------------------------------------------------------------------------
/pictures/scala-分区数.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-分区数.png
--------------------------------------------------------------------------------
/pictures/scala-视图界定.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-视图界定.png
--------------------------------------------------------------------------------
/pictures/scala继承层次.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala继承层次.png
--------------------------------------------------------------------------------
/pictures/spark-DAG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-DAG.png
--------------------------------------------------------------------------------
/pictures/spark-pi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-pi.png
--------------------------------------------------------------------------------
/pictures/spark-内存不足.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-内存不足.png
--------------------------------------------------------------------------------
/pictures/spark-分区.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-分区.png
--------------------------------------------------------------------------------
/pictures/spark-累加器1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-累加器1.png
--------------------------------------------------------------------------------
/pictures/spark-累加器2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-累加器2.png
--------------------------------------------------------------------------------
/pictures/spark-运行安全.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-运行安全.png
--------------------------------------------------------------------------------
/pictures/spark-集群模式.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群模式.png
--------------------------------------------------------------------------------
/pictures/spark集群规划.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark集群规划.png
--------------------------------------------------------------------------------
/pictures/spout-bolt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spout-bolt.png
--------------------------------------------------------------------------------
/pictures/sql-join.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sql-join.jpg
--------------------------------------------------------------------------------
/pictures/sqoop-help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-help.png
--------------------------------------------------------------------------------
/pictures/sqoop-tool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-tool.png
--------------------------------------------------------------------------------
/pictures/storm-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-flow.png
--------------------------------------------------------------------------------
/pictures/storm-jar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-jar.png
--------------------------------------------------------------------------------
/pictures/storm-jar2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-jar2.png
--------------------------------------------------------------------------------
/pictures/storm-lib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-lib.png
--------------------------------------------------------------------------------
/pictures/storm-集群规划.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-集群规划.png
--------------------------------------------------------------------------------
/pictures/storm集群搭建2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm集群搭建2.png
--------------------------------------------------------------------------------
/pictures/yarn-base.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/yarn-base.png
--------------------------------------------------------------------------------
/pictures/yarn工作原理.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/yarn工作原理.png
--------------------------------------------------------------------------------
/pictures/yarn工作原理简图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/yarn工作原理简图.png
--------------------------------------------------------------------------------
/pictures/zookeeper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper.jpg
--------------------------------------------------------------------------------
/pictures/大数据处理简化流程.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/大数据处理简化流程.png
--------------------------------------------------------------------------------
/pictures/大数据技术栈思维导图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/大数据技术栈思维导图.png
--------------------------------------------------------------------------------
/resources/orc/dept.orc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/resources/orc/dept.orc
--------------------------------------------------------------------------------
/pictures/azkaban-edit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-edit.png
--------------------------------------------------------------------------------
/pictures/azkaban-hdfs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-hdfs.png
--------------------------------------------------------------------------------
/pictures/azkaban-hive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-hive.png
--------------------------------------------------------------------------------
/pictures/azkaban-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-log.png
--------------------------------------------------------------------------------
/pictures/azkaban-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-web.png
--------------------------------------------------------------------------------
/pictures/azkaban-zip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-zip.png
--------------------------------------------------------------------------------
/pictures/flink-maven.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-maven.png
--------------------------------------------------------------------------------
/pictures/flink-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-stack.png
--------------------------------------------------------------------------------
/pictures/flume-kafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-kafka.png
--------------------------------------------------------------------------------
/pictures/flume-retry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-retry.png
--------------------------------------------------------------------------------
/pictures/hadoop-集群搭建2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-集群搭建2.png
--------------------------------------------------------------------------------
/pictures/hadoop-集群搭建3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-集群搭建3.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群1.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群2.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群3.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群4.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群5.png
--------------------------------------------------------------------------------
/pictures/hbase-60010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-60010.png
--------------------------------------------------------------------------------
/pictures/hbase-fliter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-fliter.png
--------------------------------------------------------------------------------
/pictures/hbase-hadoop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-hadoop.png
--------------------------------------------------------------------------------
/pictures/hbase-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-web-ui.png
--------------------------------------------------------------------------------
/pictures/hbase-集群搭建1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-集群搭建1.png
--------------------------------------------------------------------------------
/pictures/hbase-集群搭建2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-集群搭建2.png
--------------------------------------------------------------------------------
/pictures/hdfs-read-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-read-1.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-write-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-write-1.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-write-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-write-2.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-write-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-write-3.jpg
--------------------------------------------------------------------------------
/pictures/hive-beeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-beeline.png
--------------------------------------------------------------------------------
/pictures/hive-emp-ptn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp-ptn.png
--------------------------------------------------------------------------------
/pictures/hive-emp-ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp-ts.png
--------------------------------------------------------------------------------
/pictures/kafka-topic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-topic.png
--------------------------------------------------------------------------------
/pictures/kafka-元数据请求.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-元数据请求.png
--------------------------------------------------------------------------------
/pictures/kafka-数据可见性.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-数据可见性.png
--------------------------------------------------------------------------------
/pictures/mutli-net-ip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mutli-net-ip.png
--------------------------------------------------------------------------------
/pictures/scala-plugin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-plugin.png
--------------------------------------------------------------------------------
/pictures/scala-select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-select.png
--------------------------------------------------------------------------------
/pictures/scala-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-shell.png
--------------------------------------------------------------------------------
/pictures/scala-操作符优先级.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-操作符优先级.png
--------------------------------------------------------------------------------
/pictures/scala带有特质的对象.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala带有特质的对象.png
--------------------------------------------------------------------------------
/pictures/spark-scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-scheme.png
--------------------------------------------------------------------------------
/pictures/spark-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-shell.png
--------------------------------------------------------------------------------
/pictures/spark-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-stack.png
--------------------------------------------------------------------------------
/pictures/spark-unifed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-unifed.png
--------------------------------------------------------------------------------
/pictures/spark-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-web-ui.png
--------------------------------------------------------------------------------
/pictures/spark-内存不足2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-内存不足2.png
--------------------------------------------------------------------------------
/pictures/spark-累加器方法.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-累加器方法.png
--------------------------------------------------------------------------------
/pictures/spark-集群搭建1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群搭建1.png
--------------------------------------------------------------------------------
/pictures/spark-集群搭建2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群搭建2.png
--------------------------------------------------------------------------------
/pictures/spark-集群搭建3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群搭建3.png
--------------------------------------------------------------------------------
/pictures/spark-集群搭建4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群搭建4.png
--------------------------------------------------------------------------------
/pictures/spark-集群搭建5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-集群搭建5.png
--------------------------------------------------------------------------------
/pictures/sqoop_hbase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hbase.png
--------------------------------------------------------------------------------
/pictures/storm-bolts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-bolts.png
--------------------------------------------------------------------------------
/pictures/storm-spouts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-spouts.png
--------------------------------------------------------------------------------
/pictures/storm-tuples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-tuples.png
--------------------------------------------------------------------------------
/pictures/storm-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-web-ui.png
--------------------------------------------------------------------------------
/pictures/storm-集群搭建1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-集群搭建1.png
--------------------------------------------------------------------------------
/pictures/writetokafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/writetokafka.png
--------------------------------------------------------------------------------
/pictures/大数据技术栈思维导图.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/大数据技术栈思维导图.xmind
--------------------------------------------------------------------------------
/pictures/Phoenix-delete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-delete.png
--------------------------------------------------------------------------------
/pictures/Phoenix-hadoop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-hadoop.png
--------------------------------------------------------------------------------
/pictures/Phoenix-select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-select.png
--------------------------------------------------------------------------------
/pictures/Phoenix-update.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-update.png
--------------------------------------------------------------------------------
/pictures/RegionObserver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/RegionObserver.png
--------------------------------------------------------------------------------
/pictures/azkaban-execute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-execute.png
--------------------------------------------------------------------------------
/pictures/azkaban-flows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-flows.png
--------------------------------------------------------------------------------
/pictures/azkaban-memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-memory.png
--------------------------------------------------------------------------------
/pictures/azkaban-setting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-setting.png
--------------------------------------------------------------------------------
/pictures/azkaban-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-simple.png
--------------------------------------------------------------------------------
/pictures/azkaban-upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-upload.png
--------------------------------------------------------------------------------
/pictures/azkaban-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-web-ui.png
--------------------------------------------------------------------------------
/pictures/flink-Rescaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-Rescaling.png
--------------------------------------------------------------------------------
/pictures/flink-api-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-api-stack.png
--------------------------------------------------------------------------------
/pictures/flink-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-dashboard.png
--------------------------------------------------------------------------------
/pictures/flink-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-download.png
--------------------------------------------------------------------------------
/pictures/flink-maven-new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-maven-new.png
--------------------------------------------------------------------------------
/pictures/flink-process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-process.png
--------------------------------------------------------------------------------
/pictures/flink-richsink.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-richsink.png
--------------------------------------------------------------------------------
/pictures/flume-example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-1.png
--------------------------------------------------------------------------------
/pictures/flume-example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-2.png
--------------------------------------------------------------------------------
/pictures/flume-example-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-3.png
--------------------------------------------------------------------------------
/pictures/flume-example-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-4.png
--------------------------------------------------------------------------------
/pictures/flume-example-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-7.png
--------------------------------------------------------------------------------
/pictures/flume-example-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-8.png
--------------------------------------------------------------------------------
/pictures/flume-example-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-example-9.png
--------------------------------------------------------------------------------
/pictures/flume-kafka-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-kafka-01.png
--------------------------------------------------------------------------------
/pictures/flume-kafka-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-kafka-2.png
--------------------------------------------------------------------------------
/pictures/flume-version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-version.png
--------------------------------------------------------------------------------
/pictures/future-of-spark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/future-of-spark.png
--------------------------------------------------------------------------------
/pictures/hadoop-QJM-同步机制.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-QJM-同步机制.png
--------------------------------------------------------------------------------
/pictures/hadoop-combiner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-combiner.png
--------------------------------------------------------------------------------
/pictures/hadoop-yarn安装验证.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-yarn安装验证.png
--------------------------------------------------------------------------------
/pictures/hadoop-集群环境搭建.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-集群环境搭建.png
--------------------------------------------------------------------------------
/pictures/hadoop高可用集群规划.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop高可用集群规划.png
--------------------------------------------------------------------------------
/pictures/hbase-co-unload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-co-unload.png
--------------------------------------------------------------------------------
/pictures/hbase-cp-hdfs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-cp-hdfs.png
--------------------------------------------------------------------------------
/pictures/hbase-cp-lisi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-cp-lisi.png
--------------------------------------------------------------------------------
/pictures/hbase-cp-load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-cp-load.png
--------------------------------------------------------------------------------
/pictures/hbase-webtable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-webtable.png
--------------------------------------------------------------------------------
/pictures/hdfsdatanodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfsdatanodes.png
--------------------------------------------------------------------------------
/pictures/hive-1-2-view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-1-2-view.png
--------------------------------------------------------------------------------
/pictures/hive-data-type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-data-type.png
--------------------------------------------------------------------------------
/pictures/hive-emp-ts-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp-ts-2.png
--------------------------------------------------------------------------------
/pictures/hive-index-show.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-index-show.png
--------------------------------------------------------------------------------
/pictures/hive-install-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-install-2.png
--------------------------------------------------------------------------------
/pictures/hive-order-by.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-order-by.png
--------------------------------------------------------------------------------
/pictures/hive-right-join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-right-join.png
--------------------------------------------------------------------------------
/pictures/hive-select-emp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-select-emp.png
--------------------------------------------------------------------------------
/pictures/kafka-cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-cluster.png
--------------------------------------------------------------------------------
/pictures/mapreduce-sort.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mapreduce-sort.png
--------------------------------------------------------------------------------
/pictures/phoenix-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/phoenix-shell.png
--------------------------------------------------------------------------------
/pictures/readfromkafka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/readfromkafka.png
--------------------------------------------------------------------------------
/pictures/scala-richInt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-richInt.png
--------------------------------------------------------------------------------
/pictures/spark-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-download.png
--------------------------------------------------------------------------------
/pictures/spark-sql-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-sql-shell.png
--------------------------------------------------------------------------------
/pictures/spark-sql-自定义函数.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-sql-自定义函数.png
--------------------------------------------------------------------------------
/pictures/spark-窄依赖和宽依赖.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-窄依赖和宽依赖.png
--------------------------------------------------------------------------------
/pictures/spark-运行时类型安全.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-运行时类型安全.png
--------------------------------------------------------------------------------
/pictures/sql-hive-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sql-hive-arch.png
--------------------------------------------------------------------------------
/pictures/sqoop-hive-hdfs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-hive-hdfs.png
--------------------------------------------------------------------------------
/pictures/sqoop-map-task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-map-task.png
--------------------------------------------------------------------------------
/pictures/sqoop-mysql-jar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-mysql-jar.png
--------------------------------------------------------------------------------
/pictures/sqoop-version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-version.png
--------------------------------------------------------------------------------
/pictures/sqoop_hdfs_ls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hdfs_ls.png
--------------------------------------------------------------------------------
/pictures/storm-list-kill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-list-kill.png
--------------------------------------------------------------------------------
/pictures/storm-streams.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-streams.png
--------------------------------------------------------------------------------
/pictures/storm-topology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-topology.png
--------------------------------------------------------------------------------
/pictures/storm-集群-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-集群-shell.png
--------------------------------------------------------------------------------
/pictures/streaming-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/streaming-flow.png
--------------------------------------------------------------------------------
/pictures/topology-tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/topology-tasks.png
--------------------------------------------------------------------------------
/pictures/virtualbox启用网络.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/virtualbox启用网络.png
--------------------------------------------------------------------------------
/pictures/zookeeper-super.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-super.png
--------------------------------------------------------------------------------
/resources/csv/dept.csv:
--------------------------------------------------------------------------------
1 | 10,ACCOUNTING,NEW YORK
2 | 20,RESEARCH,DALLAS
3 | 30,SALES,CHICAGO
4 | 40,OPERATIONS,BOSTON
5 |
--------------------------------------------------------------------------------
/resources/tsv/dept.tsv:
--------------------------------------------------------------------------------
1 | 10 ACCOUNTING NEW YORK
2 | 20 RESEARCH DALLAS
3 | 30 SALES CHICAGO
4 | 40 OPERATIONS BOSTON
5 |
--------------------------------------------------------------------------------
/resources/txt/dept.txt:
--------------------------------------------------------------------------------
1 | 10 ACCOUNTING NEW YORK
2 | 20 RESEARCH DALLAS
3 | 30 SALES CHICAGO
4 | 40 OPERATIONS BOSTON
5 |
--------------------------------------------------------------------------------
/pictures/HashMap-HashTable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HashMap-HashTable.png
--------------------------------------------------------------------------------
/pictures/Stream groupings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Stream groupings.png
--------------------------------------------------------------------------------
/pictures/azkaban-successed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-successed.png
--------------------------------------------------------------------------------
/pictures/datasourcetohdfs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/datasourcetohdfs.png
--------------------------------------------------------------------------------
/pictures/flink-keyed-state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-keyed-state.png
--------------------------------------------------------------------------------
/pictures/flink-mysql-sink.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-mysql-sink.png
--------------------------------------------------------------------------------
/pictures/flink-scala-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-scala-shell.png
--------------------------------------------------------------------------------
/pictures/flink-tasks-slots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-tasks-slots.png
--------------------------------------------------------------------------------
/pictures/flink-word-count.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-word-count.png
--------------------------------------------------------------------------------
/pictures/hadoop-ha高可用集群架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-ha高可用集群架构.png
--------------------------------------------------------------------------------
/pictures/hbase-connection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-connection.png
--------------------------------------------------------------------------------
/pictures/hbase-coprocessor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-coprocessor.png
--------------------------------------------------------------------------------
/pictures/hbase-copy-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-copy-table.png
--------------------------------------------------------------------------------
/pictures/hbase-region-dis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-region-dis.png
--------------------------------------------------------------------------------
/pictures/hbase-unload-test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-unload-test.png
--------------------------------------------------------------------------------
/pictures/hdfs-tolerance-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-tolerance-1.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-tolerance-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-tolerance-2.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-tolerance-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-tolerance-3.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-tolerance-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-tolerance-4.jpg
--------------------------------------------------------------------------------
/pictures/hdfs-tolerance-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfs-tolerance-5.jpg
--------------------------------------------------------------------------------
/pictures/hdfsarchitecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hdfsarchitecture.png
--------------------------------------------------------------------------------
/pictures/hive-beeline-cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-beeline-cli.png
--------------------------------------------------------------------------------
/pictures/hive-index-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-index-table.png
--------------------------------------------------------------------------------
/pictures/hive-mysql-tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-mysql-tables.png
--------------------------------------------------------------------------------
/pictures/idea-scala-2.1.8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/idea-scala-2.1.8.png
--------------------------------------------------------------------------------
/pictures/idea-scala-change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/idea-scala-change.png
--------------------------------------------------------------------------------
/pictures/idea-scala-plugin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/idea-scala-plugin.png
--------------------------------------------------------------------------------
/pictures/idea-scala-select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/idea-scala-select.png
--------------------------------------------------------------------------------
/pictures/kafka-consumer01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-consumer01.png
--------------------------------------------------------------------------------
/pictures/kafka-consumer02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-consumer02.png
--------------------------------------------------------------------------------
/pictures/mapreduceProcess.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mapreduceProcess.png
--------------------------------------------------------------------------------
/pictures/phoenix-core-jar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/phoenix-core-jar.png
--------------------------------------------------------------------------------
/pictures/scala-collection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-collection.png
--------------------------------------------------------------------------------
/pictures/scala-hello-world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-hello-world.png
--------------------------------------------------------------------------------
/pictures/spark-flume-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-flume-input.png
--------------------------------------------------------------------------------
/pictures/spark-getpartnum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-getpartnum.png
--------------------------------------------------------------------------------
/pictures/spark-mysql-分区上下限.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-mysql-分区上下限.png
--------------------------------------------------------------------------------
/pictures/spark-reducebykey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-reducebykey.png
--------------------------------------------------------------------------------
/pictures/spark-shell-local.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-shell-local.png
--------------------------------------------------------------------------------
/pictures/sqoop_hive_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hive_error.png
--------------------------------------------------------------------------------
/pictures/sqoop_hive_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hive_table.png
--------------------------------------------------------------------------------
/pictures/sqoop_hive_tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hive_tables.png
--------------------------------------------------------------------------------
/pictures/storm-hdfs-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-hdfs-result.png
--------------------------------------------------------------------------------
/pictures/storm-ui-actions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-ui-actions.png
--------------------------------------------------------------------------------
/pictures/zookeeper-brocast.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-brocast.jpg
--------------------------------------------------------------------------------
/pictures/zookeeper-cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-cluster.png
--------------------------------------------------------------------------------
/resources/parquet/dept.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/resources/parquet/dept.parquet
--------------------------------------------------------------------------------
/resources/parquet/emp.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/resources/parquet/emp.parquet
--------------------------------------------------------------------------------
/pictures/CustomRedisCountApp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/CustomRedisCountApp.png
--------------------------------------------------------------------------------
/pictures/HBase_table-iteblog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBase_table-iteblog.png
--------------------------------------------------------------------------------
/pictures/Phoenix-create-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-create-table.png
--------------------------------------------------------------------------------
/pictures/WordCountToHBaseApp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/WordCountToHBaseApp.png
--------------------------------------------------------------------------------
/pictures/azkaban-click-edit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-click-edit.png
--------------------------------------------------------------------------------
/pictures/azkaban-dependencies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-dependencies.png
--------------------------------------------------------------------------------
/pictures/azkaban-embeded-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-embeded-flow.png
--------------------------------------------------------------------------------
/pictures/azkaban-hive-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-hive-result.png
--------------------------------------------------------------------------------
/pictures/azkaban-project-edit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-project-edit.png
--------------------------------------------------------------------------------
/pictures/azkaban-simle-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-simle-result.png
--------------------------------------------------------------------------------
/pictures/azkaban-task-abcde.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-task-abcde.png
--------------------------------------------------------------------------------
/pictures/bigdata-notes-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/bigdata-notes-icon.png
--------------------------------------------------------------------------------
/pictures/bigdata-notes-icon.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/bigdata-notes-icon.psd
--------------------------------------------------------------------------------
/pictures/curator-retry-policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/curator-retry-policy.png
--------------------------------------------------------------------------------
/pictures/flink-basis-project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-basis-project.png
--------------------------------------------------------------------------------
/pictures/flink-maven-profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-maven-profile.png
--------------------------------------------------------------------------------
/pictures/flink-non-windowed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-non-windowed.png
--------------------------------------------------------------------------------
/pictures/flink-operator-state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-operator-state.png
--------------------------------------------------------------------------------
/pictures/flink-subtask-slots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-subtask-slots.png
--------------------------------------------------------------------------------
/pictures/flink-task-subtask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-task-subtask.png
--------------------------------------------------------------------------------
/pictures/flink-yarn-session.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-yarn-session.png
--------------------------------------------------------------------------------
/pictures/flume-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-architecture.png
--------------------------------------------------------------------------------
/pictures/flume-consolidation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-consolidation.png
--------------------------------------------------------------------------------
/pictures/hadoop-code-mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-code-mapping.png
--------------------------------------------------------------------------------
/pictures/hadoop-code-reducer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-code-reducer.png
--------------------------------------------------------------------------------
/pictures/hadoop-namenode主备切换.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-namenode主备切换.png
--------------------------------------------------------------------------------
/pictures/hadoop-no-combiner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-no-combiner.png
--------------------------------------------------------------------------------
/pictures/hadoop-wordcountapp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-wordcountapp.png
--------------------------------------------------------------------------------
/pictures/hbase-Region-Server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-Region-Server.png
--------------------------------------------------------------------------------
/pictures/hbase-compareFilter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-compareFilter.png
--------------------------------------------------------------------------------
/pictures/hbase-cp-helloworld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-cp-helloworld.png
--------------------------------------------------------------------------------
/pictures/hbase-region-splite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-region-splite.png
--------------------------------------------------------------------------------
/pictures/hbase-web-ui-phoenix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-web-ui-phoenix.png
--------------------------------------------------------------------------------
/pictures/hive-emp-deptno-20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp-deptno-20.png
--------------------------------------------------------------------------------
/pictures/hive-external-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-external-table.png
--------------------------------------------------------------------------------
/pictures/hive-hadoop-bucket.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-hadoop-bucket.png
--------------------------------------------------------------------------------
/pictures/hive-show-database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-show-database.png
--------------------------------------------------------------------------------
/pictures/hive-view-properties.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-view-properties.png
--------------------------------------------------------------------------------
/pictures/kafka-cluster-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-cluster-shell.png
--------------------------------------------------------------------------------
/pictures/kafka-send-messgaes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-send-messgaes.png
--------------------------------------------------------------------------------
/pictures/mapreduce-combiner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mapreduce-combiner.png
--------------------------------------------------------------------------------
/pictures/scala-collection-imm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-collection-imm.png
--------------------------------------------------------------------------------
/pictures/scala-collection-m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-collection-m.png
--------------------------------------------------------------------------------
/pictures/spark-aggregateByKey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-aggregateByKey.png
--------------------------------------------------------------------------------
/pictures/spark-dataFrame+RDDs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-dataFrame+RDDs.png
--------------------------------------------------------------------------------
/pictures/spark-flume-console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-flume-console.png
--------------------------------------------------------------------------------
/pictures/spark-shell-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-shell-web-ui.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-arch.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-flow.png
--------------------------------------------------------------------------------
/pictures/spark-structure-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-structure-api.png
--------------------------------------------------------------------------------
/pictures/sqoop-hive-location.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-hive-location.png
--------------------------------------------------------------------------------
/pictures/sqoop-list-databases.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-list-databases.png
--------------------------------------------------------------------------------
/pictures/sqoop-mysql-connect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-mysql-connect.png
--------------------------------------------------------------------------------
/pictures/sqoop_hive_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop_hive_success.png
--------------------------------------------------------------------------------
/pictures/store-redis-manager.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/store-redis-manager.png
--------------------------------------------------------------------------------
/pictures/storm-Redis-Mapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-Redis-Mapper.png
--------------------------------------------------------------------------------
/pictures/storm-baseRichSpout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-baseRichSpout.png
--------------------------------------------------------------------------------
/pictures/storm-baseRichbolt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-baseRichbolt.png
--------------------------------------------------------------------------------
/pictures/storm-hbase-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-hbase-result.png
--------------------------------------------------------------------------------
/pictures/storm-jedicCommands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-jedicCommands.png
--------------------------------------------------------------------------------
/pictures/storm-kafka-producer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-kafka-producer.png
--------------------------------------------------------------------------------
/pictures/storm-kafka-receiver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-kafka-receiver.png
--------------------------------------------------------------------------------
/pictures/storm-package-error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-package-error.png
--------------------------------------------------------------------------------
/pictures/storm-submit-success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-submit-success.png
--------------------------------------------------------------------------------
/pictures/storm-word-count-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-word-count-p.png
--------------------------------------------------------------------------------
/pictures/strom-kafka-consumer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/strom-kafka-consumer.png
--------------------------------------------------------------------------------
/pictures/zookeeper-hadoop001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-hadoop001.png
--------------------------------------------------------------------------------
/pictures/zookeeper-hadoop002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-hadoop002.png
--------------------------------------------------------------------------------
/pictures/zookeeper-hadoop003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-hadoop003.png
--------------------------------------------------------------------------------
/pictures/zookeeper-zkservice.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-zkservice.jpg
--------------------------------------------------------------------------------
/pictures/RegionObservers-works.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/RegionObservers-works.png
--------------------------------------------------------------------------------
/pictures/azkaban-create-project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-create-project.png
--------------------------------------------------------------------------------
/pictures/azkaban-gradle-wrapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-gradle-wrapper.png
--------------------------------------------------------------------------------
/pictures/azkaban-task-abcde-zip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-task-abcde-zip.png
--------------------------------------------------------------------------------
/pictures/flink-on-yarn-session.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-on-yarn-session.jpg
--------------------------------------------------------------------------------
/pictures/flink-session-windows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-session-windows.png
--------------------------------------------------------------------------------
/pictures/flink-sliding-windows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-sliding-windows.png
--------------------------------------------------------------------------------
/pictures/flink-socket-wordcount.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-socket-wordcount.png
--------------------------------------------------------------------------------
/pictures/flink-state-management.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-state-management.png
--------------------------------------------------------------------------------
/pictures/flink-stateful-stream.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-stateful-stream.png
--------------------------------------------------------------------------------
/pictures/flink-stream-barriers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-stream-barriers.png
--------------------------------------------------------------------------------
/pictures/flink-task-parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-task-parallelism.png
--------------------------------------------------------------------------------
/pictures/flink-tumbling-windows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-tumbling-windows.png
--------------------------------------------------------------------------------
/pictures/flume-multi-agent-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-multi-agent-flow.png
--------------------------------------------------------------------------------
/pictures/hadoop-rm-ha-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-rm-ha-overview.png
--------------------------------------------------------------------------------
/pictures/hive-emp-deptno-20-30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-emp-deptno-20-30.png
--------------------------------------------------------------------------------
/pictures/hive-hadoop-mapreducer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-hadoop-mapreducer.png
--------------------------------------------------------------------------------
/pictures/idea-newproject-scala.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/idea-newproject-scala.png
--------------------------------------------------------------------------------
/pictures/jar-with-dependencies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/jar-with-dependencies.png
--------------------------------------------------------------------------------
/pictures/kafka-compress-message.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-compress-message.png
--------------------------------------------------------------------------------
/pictures/kafka-simple-producer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-simple-producer.png
--------------------------------------------------------------------------------
/pictures/scala-ordered-ordering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-ordered-ordering.png
--------------------------------------------------------------------------------
/pictures/scala-other-resources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/scala-other-resources.png
--------------------------------------------------------------------------------
/pictures/spark-Logical-Planning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-Logical-Planning.png
--------------------------------------------------------------------------------
/pictures/spark-sql-NATURAL-JOIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-sql-NATURAL-JOIN.png
--------------------------------------------------------------------------------
/pictures/spring-mybatis-phoenix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spring-mybatis-phoenix.png
--------------------------------------------------------------------------------
/pictures/sqoop-version-selected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/sqoop-version-selected.png
--------------------------------------------------------------------------------
/pictures/storm-wordcounttoredis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-wordcounttoredis.png
--------------------------------------------------------------------------------
/pictures/zookeeper-zkcomponents.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-zkcomponents.jpg
--------------------------------------------------------------------------------
/pictures/zookeeper-zknamespace.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/zookeeper-zknamespace.jpg
--------------------------------------------------------------------------------
/pictures/HADOOP-ECOSYSTEM-Edureka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HADOOP-ECOSYSTEM-Edureka.png
--------------------------------------------------------------------------------
/pictures/Phoenix-java-api-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Phoenix-java-api-result.png
--------------------------------------------------------------------------------
/pictures/azkaban-embeded-success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-embeded-success.png
--------------------------------------------------------------------------------
/pictures/azkaban-gradle-wrapper-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/azkaban-gradle-wrapper-2.png
--------------------------------------------------------------------------------
/pictures/flink-bounded-unbounded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-bounded-unbounded.png
--------------------------------------------------------------------------------
/pictures/flink-checkpoints-backend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-checkpoints-backend.png
--------------------------------------------------------------------------------
/pictures/flink-optional-components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-optional-components.png
--------------------------------------------------------------------------------
/pictures/flink-standalone-cluster.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-standalone-cluster.jpg
--------------------------------------------------------------------------------
/pictures/flink-start-cluster-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-start-cluster-shell.png
--------------------------------------------------------------------------------
/pictures/flink-window-word-count.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-window-word-count.png
--------------------------------------------------------------------------------
/pictures/hadoop-code-partitation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-code-partitation.png
--------------------------------------------------------------------------------
/pictures/hbase-bytearraycomparable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-bytearraycomparable.png
--------------------------------------------------------------------------------
/pictures/hbase-filterbase-subclass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hbase-filterbase-subclass.png
--------------------------------------------------------------------------------
/pictures/hive-hadoop-partitation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hive-hadoop-partitation.png
--------------------------------------------------------------------------------
/pictures/kafka-producer-consumer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/kafka-producer-consumer.png
--------------------------------------------------------------------------------
/pictures/mapreduce-with-combiners.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mapreduce-with-combiners.png
--------------------------------------------------------------------------------
/pictures/spark-Physical-Planning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-Physical-Planning.png
--------------------------------------------------------------------------------
/pictures/spark-Standalone-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-Standalone-web-ui.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-flume-jar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-flume-jar.png
--------------------------------------------------------------------------------
/pictures/storm-abstractRedisBolt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-abstractRedisBolt.png
--------------------------------------------------------------------------------
/pictures/storm-jar-complie-error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-jar-complie-error.png
--------------------------------------------------------------------------------
/pictures/storm-word-count-console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/storm-word-count-console.png
--------------------------------------------------------------------------------
/pictures/virtualbox-multi-network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/virtualbox-multi-network.png
--------------------------------------------------------------------------------
/pictures/Figure3Architecture-of-YARN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Figure3Architecture-of-YARN.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig1.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig2.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig3.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig4.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig5.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig6.png
--------------------------------------------------------------------------------
/pictures/HBaseArchitecture-Blog-Fig7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HBaseArchitecture-Blog-Fig7.png
--------------------------------------------------------------------------------
/pictures/flink-operator-state-para1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-operator-state-para1.png
--------------------------------------------------------------------------------
/pictures/flink-operator-state-para2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-operator-state-para2.png
--------------------------------------------------------------------------------
/pictures/flink-standalone-cluster-ha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-standalone-cluster-ha.png
--------------------------------------------------------------------------------
/pictures/flume-multiplexing-the-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flume-multiplexing-the-flow.png
--------------------------------------------------------------------------------
/pictures/mapreduce-without-combiners.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/mapreduce-without-combiners.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-dstream-ops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-dstream-ops.png
--------------------------------------------------------------------------------
/pictures/spring-boot-mybatis-phoenix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spring-boot-mybatis-phoenix.png
--------------------------------------------------------------------------------
/pictures/01_data_at_rest_infrastructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/01_data_at_rest_infrastructure.png
--------------------------------------------------------------------------------
/pictures/HDFS-HA-Architecture-Edureka.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/HDFS-HA-Architecture-Edureka.png
--------------------------------------------------------------------------------
/pictures/flink-application-submission.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-application-submission.png
--------------------------------------------------------------------------------
/pictures/flink-kafka-datasource-console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-kafka-datasource-console.png
--------------------------------------------------------------------------------
/pictures/flink-kafka-producer-consumer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-kafka-producer-consumer.png
--------------------------------------------------------------------------------
/pictures/flink-socket-wordcount-stdout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-socket-wordcount-stdout.png
--------------------------------------------------------------------------------
/pictures/flink-standalone-cluster-jps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-standalone-cluster-jps.png
--------------------------------------------------------------------------------
/pictures/spark-Big-table–to–big-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-Big-table–to–big-table.png
--------------------------------------------------------------------------------
/pictures/spark-Big-table–to–small-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-Big-table–to–small-table.png
--------------------------------------------------------------------------------
/pictures/spark-straming-kafka-console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-straming-kafka-console.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-word-count-v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-word-count-v1.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-word-count-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-word-count-v2.png
--------------------------------------------------------------------------------
/pictures/spark-streaming-word-count-v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/spark-streaming-word-count-v3.png
--------------------------------------------------------------------------------
/resources/mysql-connector-java-5.1.47.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/resources/mysql-connector-java-5.1.47.jar
--------------------------------------------------------------------------------
/pictures/Internal-Working-of-Apache-Storm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Internal-Working-of-Apache-Storm.png
--------------------------------------------------------------------------------
/pictures/flink-RichParallelSourceFunction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-RichParallelSourceFunction.png
--------------------------------------------------------------------------------
/pictures/flink-kafka-datasource-producer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/flink-kafka-datasource-producer.png
--------------------------------------------------------------------------------
/pictures/02_stream_processing_infrastructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/02_stream_processing_infrastructure.png
--------------------------------------------------------------------------------
/pictures/hadoop-wordcountcombinerpartition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/hadoop-wordcountcombinerpartition.png
--------------------------------------------------------------------------------
/pictures/Detailed-Hadoop-MapReduce-Data-Flow-14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/Detailed-Hadoop-MapReduce-Data-Flow-14.png
--------------------------------------------------------------------------------
/pictures/relationships-worker-processes-executors-tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/pictures/relationships-worker-processes-executors-tasks.png
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/resources/jdbc.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/BigData-Notes/master/code/Phoenix/spring-mybatis-phoenix/src/main/resources/jdbc.properties
--------------------------------------------------------------------------------
/resources/json/dept.json:
--------------------------------------------------------------------------------
1 | {"DEPTNO": 10,"DNAME": "ACCOUNTING","LOC": "NEW YORK"}
2 | {"DEPTNO": 20,"DNAME": "RESEARCH","LOC": "DALLAS"}
3 | {"DEPTNO": 30,"DNAME": "SALES","LOC": "CHICAGO"}
4 | {"DEPTNO": 40,"DNAME": "OPERATIONS","LOC": "BOSTON"}
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/java/com/heibaiying/bean/USPopulation.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.bean;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 |
7 | @Data
8 | @AllArgsConstructor
9 | @NoArgsConstructor
10 | public class USPopulation {
11 |
12 | private String state;
13 | private String city;
14 | private long population;
15 | }
16 |
--------------------------------------------------------------------------------
/code/Hadoop/hadoop-word-count/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO,CONSOLE
2 | log4j.addivity.org.apache=false
3 |
4 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
5 | log4j.appender.CONSOLE.Threshold=INFO
6 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss} -%-4r [%t] %-5p %x - %m%n
7 | log4j.appender.CONSOLE.Target=System.out
8 | log4j.appender.CONSOLE.Encoding=UTF-8
9 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
10 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-boot-mybatis-phoenix/src/main/java/com/heibaiying/springboot/bean/USPopulation.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.springboot.bean;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 | import lombok.ToString;
7 |
8 | @Data
9 | @AllArgsConstructor
10 | @NoArgsConstructor
11 | @ToString
12 | public class USPopulation {
13 |
14 | private String state;
15 | private String city;
16 | private long population;
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-boot-mybatis-phoenix/src/main/java/com/heibaiying/springboot/SpringBootMybatisApplication.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.springboot;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class SpringBootMybatisApplication {
8 |
9 | public static void main(String[] args) {
10 | SpringApplication.run(SpringBootMybatisApplication.class, args);
11 | }
12 |
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/java/com/heibaiying/dao/PopulationDao.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.dao;
2 |
3 | import com.heibaiying.bean.USPopulation;
4 | import org.apache.ibatis.annotations.Param;
5 |
6 | import java.util.List;
7 |
8 | public interface PopulationDao {
9 |
10 | List queryAll();
11 |
12 | void save(USPopulation USPopulation);
13 |
14 | USPopulation queryByStateAndCity(@Param("state") String state, @Param("city") String city);
15 |
16 | void deleteByStateAndCity(@Param("state") String state, @Param("city") String city);
17 | }
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *#
2 | *.iml
3 | *.ipr
4 | *.iws
5 | *.sw?
6 | *~
7 | .#*
8 | .*.md.html
9 | .DS_Store
10 | .classpath
11 | .factorypath
12 | .gradle
13 | .idea
14 | .metadata
15 | .project
16 | .recommenders
17 | .settings
18 | .springBeans
19 | /build
20 | MANIFEST.MF
21 | _site/
22 | activemq-data
23 | bin
24 | build
25 | build.log
26 | dependency-reduced-pom.xml
27 | dump.rdb
28 | interpolated*.xml
29 | lib/
30 | manifest.yml
31 | overridedb.*
32 | settings.xml
33 | target
34 | classes
35 | out
36 | logs
37 | transaction-logs
38 | .flattened-pom.xml
39 | secrets.yml
40 | .gradletasknamecache
41 | .sts4-cache
--------------------------------------------------------------------------------
/code/Hadoop/hadoop-word-count/src/main/java/com/heibaiying/component/CustomPartitioner.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import com.heibaiying.utils.WordCountDataUtils;
4 | import org.apache.hadoop.io.IntWritable;
5 | import org.apache.hadoop.io.Text;
6 | import org.apache.hadoop.mapreduce.Partitioner;
7 |
8 | /**
9 | * 自定义partitioner,按照单词分区
10 | */
11 | public class CustomPartitioner extends Partitioner {
12 |
13 | public int getPartition(Text text, IntWritable intWritable, int numPartitions) {
14 | return WordCountDataUtils.WORD_LIST.indexOf(text.toString());
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying
2 |
3 | import org.apache.flink.api.scala._
4 |
5 | object WordCountBatch {
6 |
7 | def main(args: Array[String]): Unit = {
8 | val benv = ExecutionEnvironment.getExecutionEnvironment
9 | val dataSet = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
10 | dataSet.flatMap { _.toLowerCase.split(",")}
11 | .filter (_.nonEmpty)
12 | .map { (_, 1) }
13 | .groupBy(0)
14 | .sum(1)
15 | .print()
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/resources/mybatisConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/code/Hadoop/hadoop-word-count/src/main/java/com/heibaiying/component/WordCountReducer.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.hadoop.io.IntWritable;
4 | import org.apache.hadoop.io.Text;
5 | import org.apache.hadoop.mapreduce.Reducer;
6 |
7 | import java.io.IOException;
8 |
9 | /**
10 | * 进行词频统计
11 | */
12 | public class WordCountReducer extends Reducer {
13 |
14 | @Override
15 | protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
16 | int count = 0;
17 | for (IntWritable value : values) {
18 | count += value.get();
19 | }
20 | context.write(key, new IntWritable(count));
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-basis/src/main/java/com/heibaiying/NetworkWordCount.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
5 |
6 | /**
7 | * 词频统计
8 | */
9 | object NetworkWordCount {
10 |
11 |
12 | def main(args: Array[String]) {
13 |
14 | /*指定时间间隔为5s*/
15 | val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
16 | val ssc = new StreamingContext(sparkConf, Seconds(5))
17 |
18 | /*创建文本输入流,并进行词频统计*/
19 | val lines = ssc.socketTextStream("hadoop001", 9999)
20 | lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print()
21 |
22 | /*启动服务*/
23 | ssc.start()
24 | /*等待服务结束*/
25 | ssc.awaitTermination()
26 |
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/code/Hadoop/hadoop-word-count/src/main/java/com/heibaiying/component/WordCountMapper.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.hadoop.io.IntWritable;
4 | import org.apache.hadoop.io.LongWritable;
5 | import org.apache.hadoop.io.Text;
6 | import org.apache.hadoop.mapreduce.Mapper;
7 |
8 | import java.io.IOException;
9 |
10 | /**
11 | * 将每行数据按照指定分隔符进行拆分
12 | */
13 | public class WordCountMapper extends Mapper {
14 |
15 | @Override
16 | protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
17 | String[] words = value.toString().split("\t");
18 | for (String word : words) {
19 | context.write(new Text(word), new IntWritable(1));
20 | }
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying
2 |
3 | import org.apache.flink.streaming.api.scala._
4 | import org.apache.flink.streaming.api.windowing.time.Time
5 |
6 |
7 | object WordCountStreaming {
8 |
9 | def main(args: Array[String]): Unit = {
10 |
11 | val senv = StreamExecutionEnvironment.getExecutionEnvironment
12 |
13 | val dataStream: DataStream[String] = senv.socketTextStream("192.168.0.229", 9999, '\n')
14 | dataStream.flatMap { line => line.toLowerCase.split(",") }
15 | .filter(_.nonEmpty)
16 | .map { word => (word, 1) }
17 | .keyBy(0)
18 | .timeWindow(Time.seconds(3))
19 | .sum(1)
20 | .print()
21 | senv.execute("Streaming WordCount")
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-flume/src/main/scala/com/heibaiying/flume/PushBasedWordCount.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying.flume
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
5 | import org.apache.spark.streaming.flume.FlumeUtils
6 |
7 |
8 | /**
9 | * @author : heibaiying
10 | * 基于推的方法获取数据
11 | */
12 | object PushBasedWordCount {
13 |
14 | def main(args: Array[String]): Unit = {
15 |
16 | val sparkConf = new SparkConf()
17 | val ssc = new StreamingContext(sparkConf, Seconds(5))
18 |
19 | // 1.获取输入流
20 | val flumeStream = FlumeUtils.createStream(ssc, "hadoop001", 8888)
21 |
22 | // 2.打印输入流的数据
23 | flumeStream.map(line => new String(line.event.getBody.array()).trim).print()
24 |
25 | ssc.start()
26 | ssc.awaitTermination()
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-flume/src/main/scala/com/heibaiying/flume/PullBasedWordCount.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying.flume
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
5 | import org.apache.spark.streaming.flume.FlumeUtils
6 |
7 | /**
8 | * @author : heibaiying
9 | * 使用自定义接收器的基于拉的方法获取数据
10 | */
11 | object PullBasedWordCount {
12 |
13 | def main(args: Array[String]): Unit = {
14 |
15 | val sparkConf = new SparkConf()
16 | val ssc = new StreamingContext(sparkConf, Seconds(5))
17 |
18 | // 1.获取输入流
19 | val flumeStream = FlumeUtils.createPollingStream(ssc, "hadoop001", 8888)
20 |
21 | // 2.打印输入流中的数据
22 | flumeStream.map(line => new String(line.event.getBody.array()).trim).print()
23 |
24 | ssc.start()
25 | ssc.awaitTermination()
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/resources/tsv/emp.tsv:
--------------------------------------------------------------------------------
1 | 7369 SMITH CLERK 7902 1980-12-17 00:00:00 800.00 20
2 | 7499 ALLEN SALESMAN 7698 1981-02-20 00:00:00 1600.00 300.00 30
3 | 7521 WARD SALESMAN 7698 1981-02-22 00:00:00 1250.00 500.00 30
4 | 7566 JONES MANAGER 7839 1981-04-02 00:00:00 2975.00 20
5 | 7654 MARTIN SALESMAN 7698 1981-09-28 00:00:00 1250.00 1400.00 30
6 | 7698 BLAKE MANAGER 7839 1981-05-01 00:00:00 2850.00 30
7 | 7782 CLARK MANAGER 7839 1981-06-09 00:00:00 2450.00 10
8 | 7788 SCOTT ANALYST 7566 1987-04-19 00:00:00 1500.00 20
9 | 7839 KING PRESIDENT 1981-11-17 00:00:00 5000.00 10
10 | 7844 TURNER SALESMAN 7698 1981-09-08 00:00:00 1500.00 0.00 30
11 | 7876 ADAMS CLERK 7788 1987-05-23 00:00:00 1100.00 20
12 | 7900 JAMES CLERK 7698 1981-12-03 00:00:00 950.00 30
13 | 7902 FORD ANALYST 7566 1981-12-03 00:00:00 3000.00 20
14 | 7934 MILLER CLERK 7782 1982-01-23 00:00:00 1300.00 10
15 |
--------------------------------------------------------------------------------
/resources/txt/emp.txt:
--------------------------------------------------------------------------------
1 | 7369 SMITH CLERK 7902 1980-12-17 00:00:00 800.00 20
2 | 7499 ALLEN SALESMAN 7698 1981-02-20 00:00:00 1600.00 300.00 30
3 | 7521 WARD SALESMAN 7698 1981-02-22 00:00:00 1250.00 500.00 30
4 | 7566 JONES MANAGER 7839 1981-04-02 00:00:00 2975.00 20
5 | 7654 MARTIN SALESMAN 7698 1981-09-28 00:00:00 1250.00 1400.00 30
6 | 7698 BLAKE MANAGER 7839 1981-05-01 00:00:00 2850.00 30
7 | 7782 CLARK MANAGER 7839 1981-06-09 00:00:00 2450.00 10
8 | 7788 SCOTT ANALYST 7566 1987-04-19 00:00:00 1500.00 20
9 | 7839 KING PRESIDENT 1981-11-17 00:00:00 5000.00 10
10 | 7844 TURNER SALESMAN 7698 1981-09-08 00:00:00 1500.00 0.00 30
11 | 7876 ADAMS CLERK 7788 1987-05-23 00:00:00 1100.00 20
12 | 7900 JAMES CLERK 7698 1981-12-03 00:00:00 950.00 30
13 | 7902 FORD ANALYST 7566 1981-12-03 00:00:00 3000.00 20
14 | 7934 MILLER CLERK 7782 1982-01-23 00:00:00 1300.00 10
15 |
--------------------------------------------------------------------------------
/code/Flink/flink-basis-java/src/main/java/com/heibaiying/StreamingJob.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying;
2 |
3 | import org.apache.flink.api.java.operators.DataSource;
4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
6 |
7 | public class StreamingJob {
8 |
9 | private static final String ROOT_PATH = "D:\\BigData-Notes\\code\\Flink\\flink-basis-java\\src\\main\\resources\\";
10 |
11 | public static void main(String[] args) throws Exception {
12 |
13 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
14 | DataStreamSource streamSource = env.readTextFile(ROOT_PATH + "log4j.properties");
15 | streamSource.writeAsText(ROOT_PATH + "out").setParallelism(1);
16 | env.execute();
17 |
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-boot-mybatis-phoenix/src/main/java/com/heibaiying/springboot/dao/PopulationDao.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.springboot.dao;
2 |
3 | import com.heibaiying.springboot.bean.USPopulation;
4 | import org.apache.ibatis.annotations.*;
5 |
6 | import java.util.List;
7 |
8 | @Mapper
9 | public interface PopulationDao {
10 |
11 | @Select("SELECT * from us_population")
12 | List queryAll();
13 |
14 | @Insert("UPSERT INTO us_population VALUES( #{state}, #{city}, #{population} )")
15 | void save(USPopulation USPopulation);
16 |
17 | @Select("SELECT * FROM us_population WHERE state=#{state} AND city = #{city}")
18 | USPopulation queryByStateAndCity(String state, String city);
19 |
20 |
21 | @Delete("DELETE FROM us_population WHERE state=#{state} AND city = #{city}")
22 | void deleteByStateAndCity(String state, String city);
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/producers/partitioners/CustomPartitioner.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.producers.partitioners;
2 |
3 | import org.apache.kafka.clients.producer.Partitioner;
4 | import org.apache.kafka.common.Cluster;
5 |
6 | import java.util.Map;
7 |
8 | /**
9 | * 自定义分区器
10 | */
11 | public class CustomPartitioner implements Partitioner {
12 |
13 | private int passLine;
14 |
15 | @Override
16 | public void configure(Map configs) {
17 | passLine = (Integer) configs.get("pass.line");
18 | }
19 |
20 | @Override
21 | public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
22 | return (Integer) key >= passLine ? 1 : 0;
23 | }
24 |
25 | @Override
26 | public void close() {
27 | System.out.println("分区器关闭");
28 | }
29 |
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/resources/mappers/Population.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
11 |
12 |
13 | UPSERT INTO us_population VALUES( #{state}, #{city}, #{population} )
14 |
15 |
16 |
19 |
20 |
21 | DELETE FROM us_population WHERE state=#{state} AND city = #{city}
22 |
23 |
24 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-basis/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | spark-streaming-basis
9 | 1.0
10 |
11 |
12 |
13 | org.apache.spark
14 | spark-streaming_2.12
15 | 2.4.3
16 |
17 |
18 | redis.clients
19 | jedis
20 | 2.9.0
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/code/Hbase/hbase-observer-coprocessor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | hbase-observer-coprocessor
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.hbase
14 | hbase-common
15 | 1.2.0
16 |
17 |
18 | org.apache.hbase
19 | hbase-server
20 | 1.2.0
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/bean/Employee.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.bean;
2 |
3 | import java.sql.Date;
4 |
5 | public class Employee {
6 |
7 | private String name;
8 | private int age;
9 | private Date birthday;
10 |
11 | Employee(){}
12 |
13 | public Employee(String name, int age, Date birthday) {
14 | this.name = name;
15 | this.age = age;
16 | this.birthday = birthday;
17 | }
18 |
19 | public String getName() {
20 | return name;
21 | }
22 |
23 | public void setName(String name) {
24 | this.name = name;
25 | }
26 |
27 | public int getAge() {
28 | return age;
29 | }
30 |
31 | public void setAge(int age) {
32 | this.age = age;
33 | }
34 |
35 | public Date getBirthday() {
36 | return birthday;
37 | }
38 |
39 | public void setBirthday(Date birthday) {
40 | this.birthday = birthday;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/CustomSinkJob.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying;
2 |
3 | import com.heibaiying.bean.Employee;
4 | import com.heibaiying.sink.FlinkToMySQLSink;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
7 |
8 | import java.sql.Date;
9 |
10 | public class CustomSinkJob {
11 |
12 | public static void main(String[] args) throws Exception {
13 |
14 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
15 | Date date = new Date(System.currentTimeMillis());
16 | DataStreamSource streamSource = env.fromElements(
17 | new Employee("hei", 10, date),
18 | new Employee("bai", 20, date),
19 | new Employee("ying", 30, date));
20 | streamSource.addSink(new FlinkToMySQLSink());
21 | env.execute();
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/resources/assembly.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 | jar-with-dependencies
6 |
7 |
8 |
9 | jar
10 |
11 |
12 | false
13 |
14 |
15 | /
16 | true
17 | true
18 | runtime
19 |
20 |
21 | org.apache.storm:storm-core
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-basis/src/main/java/com/heibaiying/utils/JedisPoolUtil.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.utils;
2 |
3 | import redis.clients.jedis.Jedis;
4 | import redis.clients.jedis.JedisPool;
5 | import redis.clients.jedis.JedisPoolConfig;
6 |
7 | public class JedisPoolUtil {
8 |
9 | /* 声明为volatile防止指令重排序 */
10 | private static volatile JedisPool jedisPool = null;
11 |
12 | private static final String HOST = "localhost";
13 | private static final int PORT = 6379;
14 |
15 |
16 | /* 双重检查锁实现懒汉式单例 */
17 | public static Jedis getConnection() {
18 | if (jedisPool == null) {
19 | synchronized (JedisPoolUtil.class) {
20 | if (jedisPool == null) {
21 | JedisPoolConfig config = new JedisPoolConfig();
22 | config.setMaxTotal(30);
23 | config.setMaxIdle(10);
24 | jedisPool = new JedisPool(config, HOST, PORT);
25 | }
26 | }
27 | }
28 | return jedisPool.getResource();
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/code/Storm/storm-redis-integration/src/main/java/com/heibaiying/component/WordCountStoreMapper.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
4 | import org.apache.storm.redis.common.mapper.RedisStoreMapper;
5 | import org.apache.storm.tuple.ITuple;
6 |
7 | /**
8 | * 定义tuple与Redis中数据的映射关系
9 | */
10 | public class WordCountStoreMapper implements RedisStoreMapper {
11 | private RedisDataTypeDescription description;
12 | private final String hashKey = "wordCount";
13 |
14 | public WordCountStoreMapper() {
15 | description = new RedisDataTypeDescription(
16 | RedisDataTypeDescription.RedisDataType.HASH, hashKey);
17 | }
18 |
19 | @Override
20 | public RedisDataTypeDescription getDataTypeDescription() {
21 | return description;
22 | }
23 |
24 | @Override
25 | public String getKeyFromTuple(ITuple tuple) {
26 | return tuple.getStringByField("word");
27 | }
28 |
29 | @Override
30 | public String getValueFromTuple(ITuple tuple) {
31 | return tuple.getStringByField("count");
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/notes/installation/Linux下JDK安装.md:
--------------------------------------------------------------------------------
1 | # Linux下JDK的安装
2 |
3 | >**系统环境**:centos 7.6
4 | >
5 | >**JDK 版本**:jdk 1.8.0_20
6 |
7 |
8 |
9 | ### 1. 下载并解压
10 |
11 | 在[官网](https://www.oracle.com/technetwork/java/javase/downloads/index.html) 下载所需版本的 JDK,这里我下载的版本为[JDK 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) ,下载后进行解压:
12 |
13 | ```shell
14 | [root@ java]# tar -zxvf jdk-8u201-linux-x64.tar.gz
15 | ```
16 |
17 |
18 |
19 | ### 2. 设置环境变量
20 |
21 | ```shell
22 | [root@ java]# vi /etc/profile
23 | ```
24 |
25 | 添加如下配置:
26 |
27 | ```shell
28 | export JAVA_HOME=/usr/java/jdk1.8.0_201
29 | export JRE_HOME=${JAVA_HOME}/jre
30 | export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
31 | export PATH=${JAVA_HOME}/bin:$PATH
32 | ```
33 |
34 | 执行 `source` 命令,使得配置立即生效:
35 |
36 | ```shell
37 | [root@ java]# source /etc/profile
38 | ```
39 |
40 |
41 |
42 | ### 3. 检查是否安装成功
43 |
44 | ```shell
45 | [root@ java]# java -version
46 | ```
47 |
48 | 显示出对应的版本信息则代表安装成功。
49 |
50 | ```shell
51 | java version "1.8.0_201"
52 | Java(TM) SE Runtime Environment (build 1.8.0_201-b09)
53 | Java HotSpot(TM) 64-Bit Server VM (build 25.201-b09, mixed mode)
54 |
55 | ```
56 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/LocalWordCountApp.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.wordcount;
2 |
3 | import com.heibaiying.wordcount.component.CountBolt;
4 | import com.heibaiying.wordcount.component.DataSourceSpout;
5 | import com.heibaiying.wordcount.component.SplitBolt;
6 | import org.apache.storm.Config;
7 | import org.apache.storm.LocalCluster;
8 | import org.apache.storm.topology.TopologyBuilder;
9 |
10 | public class LocalWordCountApp {
11 |
12 | public static void main(String[] args) {
13 | TopologyBuilder builder = new TopologyBuilder();
14 | builder.setSpout("DataSourceSpout", new DataSourceSpout());
15 | // 指明将 DataSourceSpout 的数据发送到 SplitBolt 中处理
16 | builder.setBolt("SplitBolt", new SplitBolt()).shuffleGrouping("DataSourceSpout");
17 | // 指明将 SplitBolt 的数据发送到 CountBolt 中 处理
18 | builder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("SplitBolt");
19 |
20 | // 创建本地集群用于测试 这种模式不需要本机安装storm,直接运行该Main方法即可
21 | LocalCluster cluster = new LocalCluster();
22 | cluster.submitTopology("LocalWordCountApp",
23 | new Config(), builder.createTopology());
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/code/Flink/flink-state-management/src/main/java/com/heibaiying/keyedstate/KeyedStateJob.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.keyedstate;
2 |
3 | import org.apache.flink.api.java.tuple.Tuple2;
4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
6 |
7 |
8 | public class KeyedStateJob {
9 |
10 | public static void main(String[] args) throws Exception {
11 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
12 | DataStreamSource> tuple2DataStreamSource = env.fromElements(
13 | Tuple2.of("a", 50L), Tuple2.of("a", 80L), Tuple2.of("a", 400L),
14 | Tuple2.of("a", 100L), Tuple2.of("a", 200L), Tuple2.of("a", 200L),
15 | Tuple2.of("b", 100L), Tuple2.of("b", 200L), Tuple2.of("b", 200L),
16 | Tuple2.of("b", 500L), Tuple2.of("b", 600L), Tuple2.of("b", 700L));
17 | tuple2DataStreamSource
18 | .keyBy(0)
19 | .flatMap(new ThresholdWarning(100L, 3))
20 | .printToErr();
21 | env.execute("Managed Keyed State");
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-boot-mybatis-phoenix/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | spring:
2 | datasource:
3 | #zookeeper地址
4 | url: jdbc:phoenix:192.168.0.105:2181
5 | driver-class-name: org.apache.phoenix.jdbc.PhoenixDriver
6 |
7 | # 如果不想配置对数据库连接池做特殊配置的话,以下关于连接池的配置就不是必须的
8 | # spring-boot 2.X 默认采用高性能的 Hikari 作为连接池 更多配置可以参考 https://github.com/brettwooldridge/HikariCP#configuration-knobs-baby
9 | type: com.zaxxer.hikari.HikariDataSource
10 | hikari:
11 | # 池中维护的最小空闲连接数
12 | minimum-idle: 10
13 | # 池中最大连接数,包括闲置和使用中的连接
14 | maximum-pool-size: 20
15 | # 此属性控制从池返回的连接的默认自动提交行为。默认为true
16 | auto-commit: true
17 | # 允许最长空闲时间
18 | idle-timeout: 30000
19 | # 此属性表示连接池的用户定义名称,主要显示在日志记录和JMX管理控制台中,以标识池和池配置。 默认值:自动生成
20 | pool-name: custom-hikari
21 | #此属性控制池中连接的最长生命周期,值0表示无限生命周期,默认1800000即30分钟
22 | max-lifetime: 1800000
23 | # 数据库连接超时时间,默认30秒,即30000
24 | connection-timeout: 30000
25 | # 连接测试sql 这个地方需要根据数据库方言差异而配置 例如 oracle 就应该写成 select 1 from dual
26 | connection-test-query: SELECT 1
27 |
28 | # mybatis 相关配置
29 | mybatis:
30 | configuration:
31 | # 是否打印sql语句 调试的时候可以开启
32 | log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/SplitBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.wordcount.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Fields;
8 | import org.apache.storm.tuple.Tuple;
9 | import org.apache.storm.tuple.Values;
10 |
11 | import java.util.Map;
12 |
13 | public class SplitBolt extends BaseRichBolt {
14 |
15 | private OutputCollector collector;
16 |
17 | @Override
18 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
19 | this.collector=collector;
20 | }
21 |
22 | @Override
23 | public void execute(Tuple input) {
24 | String line = input.getStringByField("line");
25 | String[] words = line.split("\t");
26 | for (String word : words) {
27 | collector.emit(new Values(word));
28 | }
29 | }
30 |
31 | @Override
32 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
33 | declarer.declare(new Fields("word"));
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/producers/SimpleProducer.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.producers;
2 |
3 | import org.apache.kafka.clients.producer.KafkaProducer;
4 | import org.apache.kafka.clients.producer.Producer;
5 | import org.apache.kafka.clients.producer.ProducerRecord;
6 |
7 | import java.util.Properties;
8 |
9 | /*
10 | * Kafka生产者示例
11 | */
12 |
13 | public class SimpleProducer {
14 |
15 | public static void main(String[] args) {
16 |
17 | String topicName = "Hello-Kafka";
18 |
19 | Properties props = new Properties();
20 | props.put("bootstrap.servers", "hadoop001:9092");
21 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
22 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
23 | /*创建生产者*/
24 | Producer producer = new KafkaProducer<>(props);
25 |
26 | for (int i = 0; i < 10; i++) {
27 | ProducerRecord record = new ProducerRecord<>(topicName, "hello" + i, "world" + i);
28 | /* 发送消息*/
29 | producer.send(record);
30 | }
31 |
32 | /*关闭生产者*/
33 | producer.close();
34 | }
35 | }
--------------------------------------------------------------------------------
/code/Storm/storm-kafka-integration/src/main/java/com/heibaiying/kafka/read/LogConsoleBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.kafka.read;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Tuple;
8 |
9 | import java.util.Map;
10 |
11 | /**
12 | * 打印从Kafka中获取的数据
13 | */
14 | public class LogConsoleBolt extends BaseRichBolt {
15 |
16 |
17 | private OutputCollector collector;
18 |
19 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
20 | this.collector=collector;
21 | }
22 |
23 | public void execute(Tuple input) {
24 | try {
25 | String value = input.getStringByField("value");
26 | System.out.println("received from kafka : "+ value);
27 | // 必须ack,否则会重复消费kafka中的消息
28 | collector.ack(input);
29 | }catch (Exception e){
30 | e.printStackTrace();
31 | collector.fail(input);
32 | }
33 |
34 |
35 | }
36 |
37 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
38 |
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/code/Storm/storm-hbase-integration/src/main/java/com/heibaiying/component/SplitBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Fields;
8 | import org.apache.storm.tuple.Tuple;
9 |
10 | import java.util.Map;
11 |
12 | import static org.apache.storm.utils.Utils.tuple;
13 |
14 | /**
15 | * 将每行数据按照指定分隔符进行拆分
16 | */
17 | public class SplitBolt extends BaseRichBolt {
18 |
19 | private OutputCollector collector;
20 |
21 | @Override
22 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
23 | this.collector = collector;
24 | }
25 |
26 | @Override
27 | public void execute(Tuple input) {
28 | String line = input.getStringByField("line");
29 | String[] words = line.split("\t");
30 | for (String word : words) {
31 | collector.emit(tuple(word, 1));
32 | }
33 | }
34 |
35 | @Override
36 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
37 | declarer.declare(new Fields("word", "count"));
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/code/Storm/storm-redis-integration/src/main/java/com/heibaiying/component/SplitBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Fields;
8 | import org.apache.storm.tuple.Tuple;
9 | import org.apache.storm.tuple.Values;
10 |
11 | import java.util.Map;
12 |
13 | /**
14 | * 将每行数据按照指定分隔符进行拆分
15 | */
16 | public class SplitBolt extends BaseRichBolt {
17 |
18 | private OutputCollector collector;
19 |
20 | @Override
21 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
22 | this.collector = collector;
23 | }
24 |
25 | @Override
26 | public void execute(Tuple input) {
27 | String line = input.getStringByField("line");
28 | String[] words = line.split("\t");
29 | for (String word : words) {
30 | collector.emit(new Values(word, String.valueOf(1)));
31 | }
32 | }
33 |
34 | @Override
35 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
36 | declarer.declare(new Fields("word", "count"));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/code/Flink/flink-state-management/src/main/java/com/heibaiying/operatorstate/OperatorStateJob.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.operatorstate;
2 |
3 | import org.apache.flink.api.java.tuple.Tuple2;
4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
6 |
7 |
8 | public class OperatorStateJob {
9 |
10 | public static void main(String[] args) throws Exception {
11 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
12 | // 开启检查点机制
13 | env.enableCheckpointing(1000);
14 | // 设置并行度为1
15 | DataStreamSource> tuple2DataStreamSource = env.setParallelism(1).fromElements(
16 | Tuple2.of("a", 50L), Tuple2.of("a", 80L), Tuple2.of("a", 400L),
17 | Tuple2.of("a", 100L), Tuple2.of("a", 200L), Tuple2.of("a", 200L),
18 | Tuple2.of("b", 100L), Tuple2.of("b", 200L), Tuple2.of("b", 200L),
19 | Tuple2.of("b", 500L), Tuple2.of("b", 600L), Tuple2.of("b", 700L));
20 | tuple2DataStreamSource
21 | .flatMap(new ThresholdWarning(100L, 3))
22 | .printToErr();
23 | env.execute("Managed Keyed State");
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/code/Flink/flink-basis-java/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/code/Flink/flink-basis-scala/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/code/Flink/flink-kafka-integration/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/code/Flink/flink-state-management/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/CountBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.wordcount.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Tuple;
8 |
9 | import java.util.HashMap;
10 | import java.util.Map;
11 |
12 | public class CountBolt extends BaseRichBolt {
13 |
14 | private Map counts = new HashMap<>();
15 |
16 | @Override
17 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
18 |
19 | }
20 |
21 | @Override
22 | public void execute(Tuple input) {
23 | String word = input.getStringByField("word");
24 | Integer count = counts.get(word);
25 | if (count == null) {
26 | count = 0;
27 | }
28 | count++;
29 | counts.put(word, count);
30 | // 输出
31 | System.out.print("Real-time analysis results : ");
32 | counts.forEach((key, value) -> System.out.print(key + ":" + value + "; "));
33 | System.out.println();
34 | }
35 |
36 | @Override
37 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
38 |
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/notes/installation/Linux下Flume的安装.md:
--------------------------------------------------------------------------------
1 | # Linux下Flume的安装
2 |
3 |
4 | ## 一、前置条件
5 |
6 | Flume 需要依赖 JDK 1.8+,JDK 安装方式见本仓库:
7 |
8 | > [Linux 环境下 JDK 安装](https://github.com/heibaiying/BigData-Notes/blob/master/notes/installation/Linux下JDK安装.md)
9 |
10 |
11 |
12 | ## 二 、安装步骤
13 |
14 | ### 2.1 下载并解压
15 |
16 | 下载所需版本的 Flume,这里我下载的是 `CDH` 版本的 Flume。下载地址为:http://archive.cloudera.com/cdh5/cdh/5/
17 |
18 | ```shell
19 | # 下载后进行解压
20 | tar -zxvf flume-ng-1.6.0-cdh5.15.2.tar.gz
21 | ```
22 |
23 | ### 2.2 配置环境变量
24 |
25 | ```shell
26 | # vim /etc/profile
27 | ```
28 |
29 | 添加环境变量:
30 |
31 | ```shell
32 | export FLUME_HOME=/usr/app/apache-flume-1.6.0-cdh5.15.2-bin
33 | export PATH=$FLUME_HOME/bin:$PATH
34 | ```
35 |
36 | 使得配置的环境变量立即生效:
37 |
38 | ```shell
39 | # source /etc/profile
40 | ```
41 |
42 | ### 2.3 修改配置
43 |
44 | 进入安装目录下的 `conf/` 目录,拷贝 Flume 的环境配置模板 `flume-env.sh.template`:
45 |
46 | ```shell
47 | # cp flume-env.sh.template flume-env.sh
48 | ```
49 |
50 | 修改 `flume-env.sh`,指定 JDK 的安装路径:
51 |
52 | ```shell
53 | # Enviroment variables can be set here.
54 | export JAVA_HOME=/usr/java/jdk1.8.0_201
55 | ```
56 |
57 | ### 2.4 验证
58 |
59 | 由于已经将 Flume 的 bin 目录配置到环境变量,直接使用以下命令验证是否配置成功:
60 |
61 | ```shell
62 | # flume-ng version
63 | ```
64 |
65 | 出现对应的版本信息则代表配置成功。
66 |
67 | 
68 |
69 |
--------------------------------------------------------------------------------
/code/Hbase/hbase-java-api-2.x/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | hbase-java-api-2.x
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | org.apache.hbase
26 | hbase-client
27 | 2.1.4
28 |
29 |
30 | junit
31 | junit
32 | 4.12
33 | test
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/code/Hbase/hbase-java-api-1.x/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | hbase-java-api-1.x
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | org.apache.hbase
27 | hbase-client
28 | 1.2.0
29 |
30 |
31 | junit
32 | junit
33 | 4.12
34 | test
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/code/Hadoop/hdfs-java-api/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | hdfs-java-api
9 | 1.0
10 |
11 |
12 |
13 | UTF-8
14 | 2.6.0-cdh5.15.2
15 |
16 |
17 |
18 |
19 |
20 |
21 | cloudera
22 | https://repository.cloudera.com/artifactory/cloudera-repos/
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | org.apache.hadoop
31 | hadoop-client
32 | ${hadoop.version}
33 |
34 |
35 | junit
36 | junit
37 | 4.12
38 | test
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/producers/ProducerWithPartitioner.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.producers;
2 |
3 | import org.apache.kafka.clients.producer.*;
4 |
5 | import java.util.Properties;
6 |
7 | /*
8 | * Kafka生产者示例——异步发送消息
9 | */
10 | public class ProducerWithPartitioner {
11 |
12 | public static void main(String[] args) {
13 |
14 | String topicName = "Kafka-Partitioner-Test";
15 |
16 | Properties props = new Properties();
17 | props.put("bootstrap.servers", "hadoop001:9092");
18 | props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
19 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
20 |
21 | /*传递自定义分区器*/
22 | props.put("partitioner.class", "com.heibaiying.producers.partitioners.CustomPartitioner");
23 | /*传递分区器所需的参数*/
24 | props.put("pass.line", 6);
25 |
26 | Producer producer = new KafkaProducer<>(props);
27 |
28 | for (int i = 0; i <= 10; i++) {
29 | String score = "score:" + i;
30 | ProducerRecord record = new ProducerRecord<>(topicName, i, score);
31 | /*异步发送消息*/
32 | producer.send(record, (metadata, exception) ->
33 | System.out.printf("%s, partition=%d, \n", score, metadata.partition()));
34 | }
35 |
36 | producer.close();
37 | }
38 | }
--------------------------------------------------------------------------------
/notes/installation/Linux下Python安装.md:
--------------------------------------------------------------------------------
1 | ## Linux下Python安装
2 |
3 | >**系统环境**:centos 7.6
4 | >
5 | >**Python 版本**:Python-3.6.8
6 |
7 | ### 1. 环境依赖
8 |
9 | Python3.x 的安装需要依赖这四个组件:gcc, zlib,zlib-devel,openssl-devel;所以需要预先安装,命令如下:
10 |
11 | ```shell
12 | yum install gcc -y
13 | yum install zlib -y
14 | yum install zlib-devel -y
15 | yum install openssl-devel -y
16 | ```
17 |
18 | ### 2. 下载编译
19 |
20 | Python 源码包下载地址: https://www.python.org/downloads/
21 |
22 | ```shell
23 | # wget https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tgz
24 | ```
25 |
26 | ### 3. 解压编译
27 |
28 | ```shell
29 | # tar -zxvf Python-3.6.8.tgz
30 | ```
31 |
32 | 进入根目录进行编译,可以指定编译安装的路径,这里我们指定为 `/usr/app/python3.6` :
33 |
34 | ```shell
35 | # cd Python-3.6.8
36 | # ./configure --prefix=/usr/app/python3.6
37 | # make && make install
38 | ```
39 |
40 | ### 4. 环境变量配置
41 |
42 | ```shell
43 | vim /etc/profile
44 | ```
45 |
46 | ```shell
47 | export PYTHON_HOME=/usr/app/python3.6
48 | export PATH=${PYTHON_HOME}/bin:$PATH
49 | ```
50 |
51 | 使得配置的环境变量立即生效:
52 |
53 | ```shell
54 | source /etc/profile
55 | ```
56 |
57 | ### 5. 验证安装是否成功
58 |
59 | 输入 `python3` 命令,如果能进入 python 交互环境,则代表安装成功:
60 |
61 | ```shell
62 | [root@hadoop001 app]# python3
63 | Python 3.6.8 (default, Mar 29 2019, 10:17:41)
64 | [GCC 4.8.5 20150623 (Red Hat 4.8.5-36)] on linux
65 | Type "help", "copyright", "credits" or "license" for more information.
66 | >>> 1+1
67 | 2
68 | >>> exit()
69 | [root@hadoop001 app]#
70 | ```
71 |
72 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-basis/src/main/java/com/heibaiying/NetworkWordCountToRedis.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying
2 |
3 | import com.heibaiying.utils.JedisPoolUtil
4 | import org.apache.spark.SparkConf
5 | import org.apache.spark.streaming.dstream.DStream
6 | import org.apache.spark.streaming.{Seconds, StreamingContext}
7 | import redis.clients.jedis.Jedis
8 |
9 | /**
10 | * 词频统计
11 | */
12 | object NetworkWordCountToRedis {
13 |
14 |
15 | def main(args: Array[String]) {
16 |
17 | /*指定时间间隔为5s*/
18 | val sparkConf = new SparkConf().setAppName("NetworkWordCountToRedis").setMaster("local[2]")
19 | val ssc = new StreamingContext(sparkConf, Seconds(5))
20 |
21 | /*创建文本输入流,并进行词频统计*/
22 | val lines = ssc.socketTextStream("hadoop001", 9999)
23 | val pairs: DStream[(String, Int)] = lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _)
24 |
25 | pairs.foreachRDD { rdd =>
26 | rdd.foreachPartition { partitionOfRecords =>
27 | var jedis: Jedis = null
28 | try {
29 | jedis = JedisPoolUtil.getConnection
30 | partitionOfRecords.foreach(record => jedis.hincrBy("wordCount", record._1, record._2))
31 | } catch {
32 | case ex: Exception =>
33 | ex.printStackTrace()
34 | } finally {
35 | if (jedis != null) jedis.close()
36 | }
37 | }
38 | }
39 |
40 | /*启动服务*/
41 | ssc.start()
42 | /*等待服务结束*/
43 | ssc.awaitTermination()
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-basis/src/main/java/com/heibaiying/NetworkWordCountV2.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
5 |
6 | /**
7 | * 词频统计升级版
8 | */
9 | object NetworkWordCountV2 {
10 |
11 |
12 | def main(args: Array[String]) {
13 |
14 | /*
15 | * 本地测试时最好指定hadoop用户名,否则会默认使用本地电脑的用户名,
16 | * 此时在HDFS上创建目录时可能会抛出权限不足的异常
17 | */
18 | System.setProperty("HADOOP_USER_NAME", "root")
19 |
20 | /*指定时间间隔为5s*/
21 | val sparkConf = new SparkConf().setAppName("NetworkWordCountV2").setMaster("local[2]")
22 | val ssc = new StreamingContext(sparkConf, Seconds(5))
23 |
24 | /*必须要设置检查点*/
25 | ssc.checkpoint("hdfs://hadoop001:8020/spark-streaming")
26 |
27 | /*创建文本输入流,并进行词频统计*/
28 | val lines = ssc.socketTextStream("hadoop001", 9999)
29 | lines.flatMap(_.split(" ")).map(x => (x, 1))
30 | .updateStateByKey[Int](updateFunction _)
31 | .print()
32 |
33 | /*启动服务*/
34 | ssc.start()
35 | /*等待服务结束*/
36 | ssc.awaitTermination()
37 |
38 | }
39 |
40 | /**
41 | * 累计求和
42 | *
43 | * @param currentValues 当前的数据
44 | * @param preValues 之前的数据
45 | * @return 相加后的数据
46 | */
47 | def updateFunction(currentValues: Seq[Int], preValues: Option[Int]): Option[Int] = {
48 | val current = currentValues.sum
49 | val pre = preValues.getOrElse(0)
50 | Some(current + pre)
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/code/Storm/storm-hbase-integration/src/main/java/com/heibaiying/component/CountBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Fields;
8 | import org.apache.storm.tuple.Tuple;
9 | import org.apache.storm.tuple.Values;
10 |
11 | import java.util.HashMap;
12 | import java.util.Map;
13 |
14 | /**
15 | * 进行词频统计
16 | */
17 | public class CountBolt extends BaseRichBolt {
18 |
19 | private Map counts = new HashMap<>();
20 |
21 | private OutputCollector collector;
22 |
23 |
24 | @Override
25 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
26 | this.collector=collector;
27 | }
28 |
29 | @Override
30 | public void execute(Tuple input) {
31 | String word = input.getStringByField("word");
32 | Integer count = counts.get(word);
33 | if (count == null) {
34 | count = 0;
35 | }
36 | count++;
37 | counts.put(word, count);
38 | // 输出
39 | collector.emit(new Values(word, String.valueOf(count)));
40 |
41 | }
42 |
43 | @Override
44 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
45 | declarer.declare(new Fields("word", "count"));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/code/Storm/storm-redis-integration/src/main/java/com/heibaiying/component/CountBolt.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.task.OutputCollector;
4 | import org.apache.storm.task.TopologyContext;
5 | import org.apache.storm.topology.OutputFieldsDeclarer;
6 | import org.apache.storm.topology.base.BaseRichBolt;
7 | import org.apache.storm.tuple.Fields;
8 | import org.apache.storm.tuple.Tuple;
9 | import org.apache.storm.tuple.Values;
10 |
11 | import java.util.HashMap;
12 | import java.util.Map;
13 |
14 | /**
15 | * 进行词频统计
16 | */
17 | public class CountBolt extends BaseRichBolt {
18 |
19 | private Map counts = new HashMap<>();
20 |
21 | private OutputCollector collector;
22 |
23 |
24 | @Override
25 | public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
26 | this.collector=collector;
27 | }
28 |
29 | @Override
30 | public void execute(Tuple input) {
31 | String word = input.getStringByField("word");
32 | Integer count = counts.get(word);
33 | if (count == null) {
34 | count = 0;
35 | }
36 | count++;
37 | counts.put(word, count);
38 | // 输出
39 | collector.emit(new Values(word, String.valueOf(count)));
40 |
41 | }
42 |
43 | @Override
44 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
45 | declarer.declare(new Fields("word", "count"));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/ClusterWordCountApp.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.wordcount;
2 |
3 | import com.heibaiying.wordcount.component.CountBolt;
4 | import com.heibaiying.wordcount.component.DataSourceSpout;
5 | import com.heibaiying.wordcount.component.SplitBolt;
6 | import org.apache.storm.Config;
7 | import org.apache.storm.LocalCluster;
8 | import org.apache.storm.StormSubmitter;
9 | import org.apache.storm.generated.AlreadyAliveException;
10 | import org.apache.storm.generated.AuthorizationException;
11 | import org.apache.storm.generated.InvalidTopologyException;
12 | import org.apache.storm.topology.TopologyBuilder;
13 |
14 | public class ClusterWordCountApp {
15 |
16 | public static void main(String[] args) {
17 | TopologyBuilder builder = new TopologyBuilder();
18 | builder.setSpout("DataSourceSpout", new DataSourceSpout());
19 | // 指明将 DataSourceSpout 的数据发送到 SplitBolt 中处理
20 | builder.setBolt("SplitBolt", new SplitBolt()).shuffleGrouping("DataSourceSpout");
21 | // 指明将 SplitBolt 的数据发送到 CountBolt 中 处理
22 | builder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("SplitBolt");
23 |
24 | // 使用StormSubmitter提交Topology到服务器集群
25 | try {
26 | StormSubmitter.submitTopology("ClusterWordCountApp", new Config(), builder.createTopology());
27 | } catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
28 | e.printStackTrace();
29 | }
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | kafka-basis
9 | 1.0
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | org.apache.kafka
27 | kafka-clients
28 | 2.2.0
29 |
30 |
31 | org.apache.kafka
32 | kafka_2.12
33 | 2.2.0
34 |
35 |
36 | org.slf4j
37 | slf4j-nop
38 | 1.7.25
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/sink/FlinkToMySQLSink.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.sink;
2 |
3 | import com.heibaiying.bean.Employee;
4 | import org.apache.flink.configuration.Configuration;
5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
6 |
7 | import java.sql.Connection;
8 | import java.sql.DriverManager;
9 | import java.sql.PreparedStatement;
10 |
11 | public class FlinkToMySQLSink extends RichSinkFunction {
12 |
13 | private PreparedStatement stmt;
14 | private Connection conn;
15 |
16 | @Override
17 | public void open(Configuration parameters) throws Exception {
18 | Class.forName("com.mysql.cj.jdbc.Driver");
19 | conn = DriverManager.getConnection("jdbc:mysql://192.168.0.229:3306/employees?characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false", "root", "123456");
20 | String sql = "insert into emp(name, age, birthday) values(?, ?, ?)";
21 | stmt = conn.prepareStatement(sql);
22 | }
23 |
24 | @Override
25 | public void invoke(Employee value, Context context) throws Exception {
26 | stmt.setString(1, value.getName());
27 | stmt.setInt(2, value.getAge());
28 | stmt.setDate(3, value.getBirthday());
29 | stmt.executeUpdate();
30 | }
31 |
32 | @Override
33 | public void close() throws Exception {
34 | super.close();
35 | if (stmt != null) {
36 | stmt.close();
37 | }
38 | if (conn != null) {
39 | conn.close();
40 | }
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | spark-streaming-kafka
9 | 1.0
10 |
11 |
12 | 2.12
13 | 2.4.0
14 |
15 |
16 |
17 |
18 |
19 | org.apache.spark
20 | spark-streaming_${scala.version}
21 | ${spark.version}
22 |
23 |
24 |
25 | org.apache.spark
26 | spark-streaming-kafka-0-10_${scala.version}
27 | 2.4.3
28 |
29 |
31 |
32 | com.thoughtworks.paranamer
33 | paranamer
34 | 2.8
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/producers/ProducerASyn.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.producers;
2 |
3 | import org.apache.kafka.clients.producer.*;
4 |
5 | import java.util.Properties;
6 |
7 | /*
8 | * Kafka生产者示例——异步发送消息
9 | */
10 | public class ProducerASyn {
11 |
12 | public static void main(String[] args) {
13 |
14 | String topicName = "Hello-Kafka";
15 |
16 | Properties props = new Properties();
17 | props.put("bootstrap.servers", "hadoop001:9092");
18 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
19 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
20 | /*创建生产者*/
21 | Producer producer = new KafkaProducer<>(props);
22 |
23 | for (int i = 0; i < 10; i++) {
24 | ProducerRecord record = new ProducerRecord<>(topicName, "k" + i, "world" + i);
25 | /*异步发送消息,并监听回调*/
26 | producer.send(record, new Callback() {
27 | @Override
28 | public void onCompletion(RecordMetadata metadata, Exception exception) {
29 | if (exception != null) {
30 | System.out.println("进行异常处理");
31 | } else {
32 | System.out.printf("topic=%s, partition=%d, offset=%s \n",
33 | metadata.topic(), metadata.partition(), metadata.offset());
34 | }
35 | }
36 | });
37 | }
38 |
39 | /*关闭生产者*/
40 | producer.close();
41 | }
42 | }
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/producers/ProducerSyn.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.producers;
2 |
3 | import org.apache.kafka.clients.producer.KafkaProducer;
4 | import org.apache.kafka.clients.producer.Producer;
5 | import org.apache.kafka.clients.producer.ProducerRecord;
6 | import org.apache.kafka.clients.producer.RecordMetadata;
7 |
8 | import java.util.Properties;
9 | import java.util.concurrent.ExecutionException;
10 |
11 | /*
12 | * Kafka生产者示例——同步发送消息
13 | */
14 | public class ProducerSyn {
15 |
16 | public static void main(String[] args) {
17 |
18 | String topicName = "Hello-Kafka";
19 |
20 | Properties props = new Properties();
21 | props.put("bootstrap.servers", "hadoop001:9092");
22 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
23 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
24 | /*创建生产者*/
25 | Producer producer = new KafkaProducer<>(props);
26 |
27 | for (int i = 0; i < 10; i++) {
28 | try {
29 | ProducerRecord record = new ProducerRecord<>(topicName, "k" + i, "world" + i);
30 | /*同步发送消息*/
31 | RecordMetadata metadata = producer.send(record).get();
32 | System.out.printf("topic=%s, partition=%d, offset=%s \n",
33 | metadata.topic(), metadata.partition(), metadata.offset());
34 | } catch (InterruptedException | ExecutionException e) {
35 | e.printStackTrace();
36 | }
37 | }
38 |
39 | /*关闭生产者*/
40 | producer.close();
41 | }
42 | }
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/consumers/ConsumerSyn.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.consumers;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
6 |
7 | import java.time.Duration;
8 | import java.time.temporal.ChronoUnit;
9 | import java.util.Collections;
10 | import java.util.Properties;
11 |
12 | /**
13 | * Kafka消费者——同步提交
14 | */
15 | public class ConsumerSyn {
16 |
17 | public static void main(String[] args) {
18 | String topic = "Hello-Kafka";
19 | String group = "group1";
20 | Properties props = new Properties();
21 | props.put("bootstrap.servers", "hadoop001:9092");
22 | props.put("group.id", group);
23 | props.put("enable.auto.commit", false);
24 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
26 | KafkaConsumer consumer = new KafkaConsumer<>(props);
27 |
28 | consumer.subscribe(Collections.singletonList(topic));
29 |
30 | try {
31 | while (true) {
32 | ConsumerRecords records = consumer.poll(Duration.of(100, ChronoUnit.MILLIS));
33 | for (ConsumerRecord record : records) {
34 | System.out.println(record);
35 | }
36 | /*同步提交*/
37 | consumer.commitSync();
38 | }
39 | } finally {
40 | consumer.close();
41 | }
42 |
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/DataSourceSpout.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.wordcount.component;
2 |
3 | import org.apache.commons.lang3.StringUtils;
4 | import org.apache.storm.spout.SpoutOutputCollector;
5 | import org.apache.storm.task.TopologyContext;
6 | import org.apache.storm.topology.OutputFieldsDeclarer;
7 | import org.apache.storm.topology.base.BaseRichSpout;
8 | import org.apache.storm.tuple.Fields;
9 | import org.apache.storm.tuple.Values;
10 | import org.apache.storm.utils.Utils;
11 |
12 | import java.util.*;
13 |
14 | public class DataSourceSpout extends BaseRichSpout {
15 |
16 | private List list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
17 |
18 | private SpoutOutputCollector spoutOutputCollector;
19 |
20 | @Override
21 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
22 | this.spoutOutputCollector = spoutOutputCollector;
23 | }
24 |
25 | @Override
26 | public void nextTuple() {
27 | // 模拟产生数据
28 | String lineData = productData();
29 | spoutOutputCollector.emit(new Values(lineData));
30 | Utils.sleep(1000);
31 | }
32 |
33 | @Override
34 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
35 | outputFieldsDeclarer.declare(new Fields("line"));
36 | }
37 |
38 |
39 | /**
40 | * 模拟数据
41 | */
42 | private String productData() {
43 | Collections.shuffle(list);
44 | Random random = new Random();
45 | int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
46 | return StringUtils.join(list.toArray(), "\t", 0, endIndex);
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/code/Zookeeper/curator/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | curator
9 | 1.0
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | org.apache.curator
27 | curator-framework
28 | 4.0.0
29 |
30 |
31 | org.apache.curator
32 | curator-recipes
33 | 4.0.0
34 |
35 |
36 | org.apache.zookeeper
37 | zookeeper
38 | 3.4.13
39 |
40 |
41 |
42 | junit
43 | junit
44 | 4.12
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/code/Storm/storm-hbase-integration/src/main/java/com/heibaiying/component/DataSourceSpout.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
4 | import org.apache.storm.spout.SpoutOutputCollector;
5 | import org.apache.storm.task.TopologyContext;
6 | import org.apache.storm.topology.OutputFieldsDeclarer;
7 | import org.apache.storm.topology.base.BaseRichSpout;
8 | import org.apache.storm.tuple.Fields;
9 | import org.apache.storm.tuple.Values;
10 | import org.apache.storm.utils.Utils;
11 |
12 | import java.util.*;
13 |
14 |
15 | /**
16 | * 产生词频样本的数据源
17 | */
18 | public class DataSourceSpout extends BaseRichSpout {
19 |
20 | private List list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
21 |
22 | private SpoutOutputCollector spoutOutputCollector;
23 |
24 | @Override
25 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
26 | this.spoutOutputCollector = spoutOutputCollector;
27 | }
28 |
29 | @Override
30 | public void nextTuple() {
31 | // 模拟产生数据
32 | String lineData = productData();
33 | spoutOutputCollector.emit(new Values(lineData));
34 | Utils.sleep(1000);
35 | }
36 |
37 | @Override
38 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
39 | outputFieldsDeclarer.declare(new Fields("line"));
40 | }
41 |
42 |
43 | /**
44 | * 模拟数据
45 | */
46 | private String productData() {
47 | Collections.shuffle(list);
48 | Random random = new Random();
49 | int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
50 | return StringUtils.join(list.toArray(), "\t", 0, endIndex);
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/code/Storm/storm-hdfs-integration/src/main/java/com.heibaiying/component/DataSourceSpout.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
4 | import org.apache.storm.spout.SpoutOutputCollector;
5 | import org.apache.storm.task.TopologyContext;
6 | import org.apache.storm.topology.OutputFieldsDeclarer;
7 | import org.apache.storm.topology.base.BaseRichSpout;
8 | import org.apache.storm.tuple.Fields;
9 | import org.apache.storm.tuple.Values;
10 | import org.apache.storm.utils.Utils;
11 |
12 | import java.util.*;
13 |
14 |
15 | /**
16 | * 产生词频样本的数据源
17 | */
18 | public class DataSourceSpout extends BaseRichSpout {
19 |
20 | private List list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
21 |
22 | private SpoutOutputCollector spoutOutputCollector;
23 |
24 | @Override
25 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
26 | this.spoutOutputCollector = spoutOutputCollector;
27 | }
28 |
29 | @Override
30 | public void nextTuple() {
31 | // 模拟产生数据
32 | String lineData = productData();
33 | spoutOutputCollector.emit(new Values(lineData));
34 | Utils.sleep(1000);
35 | }
36 |
37 | @Override
38 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
39 | outputFieldsDeclarer.declare(new Fields("line"));
40 | }
41 |
42 |
43 | /**
44 | * 模拟数据
45 | */
46 | private String productData() {
47 | Collections.shuffle(list);
48 | Random random = new Random();
49 | int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
50 | return StringUtils.join(list.toArray(), "\t", 0, endIndex);
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/code/Storm/storm-redis-integration/src/main/java/com/heibaiying/component/DataSourceSpout.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.component;
2 |
3 | import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
4 | import org.apache.storm.spout.SpoutOutputCollector;
5 | import org.apache.storm.task.TopologyContext;
6 | import org.apache.storm.topology.OutputFieldsDeclarer;
7 | import org.apache.storm.topology.base.BaseRichSpout;
8 | import org.apache.storm.tuple.Fields;
9 | import org.apache.storm.tuple.Values;
10 | import org.apache.storm.utils.Utils;
11 |
12 | import java.util.*;
13 |
14 |
15 | /**
16 | * 产生词频样本的数据源
17 | */
18 | public class DataSourceSpout extends BaseRichSpout {
19 |
20 | private List list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
21 |
22 | private SpoutOutputCollector spoutOutputCollector;
23 |
24 | @Override
25 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
26 | this.spoutOutputCollector = spoutOutputCollector;
27 | }
28 |
29 | @Override
30 | public void nextTuple() {
31 | // 模拟产生数据
32 | String lineData = productData();
33 | spoutOutputCollector.emit(new Values(lineData));
34 | Utils.sleep(1000);
35 | }
36 |
37 | @Override
38 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
39 | outputFieldsDeclarer.declare(new Fields("line"));
40 | }
41 |
42 |
43 | /**
44 | * 模拟数据
45 | */
46 | private String productData() {
47 | Collections.shuffle(list);
48 | Random random = new Random();
49 | int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
50 | return StringUtils.join(list.toArray(), "\t", 0, endIndex);
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/code/Storm/storm-kafka-integration/src/main/java/com/heibaiying/kafka/write/DataSourceSpout.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.kafka.write;
2 |
3 | import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
4 | import org.apache.storm.spout.SpoutOutputCollector;
5 | import org.apache.storm.task.TopologyContext;
6 | import org.apache.storm.topology.OutputFieldsDeclarer;
7 | import org.apache.storm.topology.base.BaseRichSpout;
8 | import org.apache.storm.tuple.Fields;
9 | import org.apache.storm.tuple.Values;
10 | import org.apache.storm.utils.Utils;
11 |
12 | import java.util.*;
13 |
14 | /**
15 | * 产生词频样本的数据源
16 | */
17 | public class DataSourceSpout extends BaseRichSpout {
18 |
19 | private List list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
20 |
21 | private SpoutOutputCollector spoutOutputCollector;
22 |
23 | @Override
24 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
25 | this.spoutOutputCollector = spoutOutputCollector;
26 | }
27 |
28 | @Override
29 | public void nextTuple() {
30 | // 模拟产生数据
31 | String lineData = productData();
32 | spoutOutputCollector.emit(new Values("key",lineData));
33 | Utils.sleep(1000);
34 | }
35 |
36 | @Override
37 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
38 | outputFieldsDeclarer.declare( new Fields("key", "message"));
39 | }
40 |
41 |
42 | /**
43 | * 模拟数据
44 | */
45 | private String productData() {
46 | Collections.shuffle(list);
47 | Random random = new Random();
48 | int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
49 | return StringUtils.join(list.toArray(), "\t", 0, endIndex);
50 | }
51 |
52 | }
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/test/java/com/heibaiying/dao/PopulationDaoTest.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.dao;
2 |
3 | import com.heibaiying.bean.USPopulation;
4 | import org.junit.Test;
5 | import org.junit.runner.RunWith;
6 | import org.springframework.beans.factory.annotation.Autowired;
7 | import org.springframework.test.context.ContextConfiguration;
8 | import org.springframework.test.context.junit4.SpringRunner;
9 |
10 | import java.util.List;
11 |
12 | @RunWith(SpringRunner.class)
13 | @ContextConfiguration({"classpath:springApplication.xml"})
14 | public class PopulationDaoTest {
15 |
16 | @Autowired
17 | private PopulationDao populationDao;
18 |
19 | @Test
20 | public void queryAll() {
21 | List USPopulationList = populationDao.queryAll();
22 | if (USPopulationList != null) {
23 | for (USPopulation USPopulation : USPopulationList) {
24 | System.out.println(USPopulation.getCity() + " " + USPopulation.getPopulation());
25 | }
26 | }
27 | }
28 |
29 | @Test
30 | public void save() {
31 | populationDao.save(new USPopulation("TX", "Dallas", 66666));
32 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
33 | System.out.println(usPopulation);
34 | }
35 |
36 | @Test
37 | public void update() {
38 | populationDao.save(new USPopulation("TX", "Dallas", 99999));
39 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
40 | System.out.println(usPopulation);
41 | }
42 |
43 |
44 | @Test
45 | public void delete() {
46 | populationDao.deleteByStateAndCity("TX", "Dallas");
47 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
48 | System.out.println(usPopulation);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/code/Hadoop/hadoop-word-count/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | hadoop-word-count
9 | 1.0
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | UTF-8
26 | 2.6.0-cdh5.15.2
27 |
28 |
29 |
30 |
31 |
32 |
33 | cloudera
34 | https://repository.cloudera.com/artifactory/cloudera-repos/
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | org.apache.hadoop
43 | hadoop-client
44 | ${hadoop.version}
45 |
46 |
47 |
48 | org.apache.commons
49 | commons-lang3
50 | 3.8.1
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/consumers/ConsumerGroup.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.consumers;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
6 |
7 | import java.time.Duration;
8 | import java.time.temporal.ChronoUnit;
9 | import java.util.Collections;
10 | import java.util.Properties;
11 |
12 |
13 | /**
14 | * Kafka消费者和消费者组
15 | */
16 | public class ConsumerGroup {
17 |
18 | public static void main(String[] args) {
19 | String topic = "Hello-Kafka";
20 | String group = "group1";
21 | Properties props = new Properties();
22 | props.put("bootstrap.servers", "hadoop001:9092");
23 | /*指定分组ID*/
24 | props.put("group.id", group);
25 | props.put("enable.auto.commit", true);
26 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
27 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
28 | KafkaConsumer consumer = new KafkaConsumer<>(props);
29 |
30 | /*订阅主题(s)*/
31 | consumer.subscribe(Collections.singletonList(topic));
32 |
33 | try {
34 | while (true) {
35 | /*轮询获取数据*/
36 | ConsumerRecords records = consumer.poll(Duration.of(100, ChronoUnit.MILLIS));
37 | for (ConsumerRecord record : records) {
38 | System.out.printf("topic = %s,partition = %d, key = %s, value = %s, offset = %d,\n",
39 | record.topic(), record.partition(), record.key(), record.value(), record.offset());
40 | }
41 | }
42 | } finally {
43 | consumer.close();
44 | }
45 |
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-boot-mybatis-phoenix/src/test/java/com/heibaiying/springboot/PopulationTest.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.springboot;
2 |
3 | import com.heibaiying.springboot.bean.USPopulation;
4 | import com.heibaiying.springboot.dao.PopulationDao;
5 | import org.junit.Test;
6 | import org.junit.runner.RunWith;
7 | import org.springframework.beans.factory.annotation.Autowired;
8 | import org.springframework.boot.test.context.SpringBootTest;
9 | import org.springframework.test.context.junit4.SpringRunner;
10 |
11 | import java.util.List;
12 |
13 | @RunWith(SpringRunner.class)
14 | @SpringBootTest
15 | public class PopulationTest {
16 |
17 | @Autowired
18 | private PopulationDao populationDao;
19 |
20 | @Test
21 | public void queryAll() {
22 | List USPopulationList = populationDao.queryAll();
23 | if (USPopulationList != null) {
24 | for (USPopulation USPopulation : USPopulationList) {
25 | System.out.println(USPopulation.getCity() + " " + USPopulation.getPopulation());
26 | }
27 | }
28 | }
29 |
30 | @Test
31 | public void save() {
32 | populationDao.save(new USPopulation("TX", "Dallas", 66666));
33 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
34 | System.out.println(usPopulation);
35 | }
36 |
37 | @Test
38 | public void update() {
39 | populationDao.save(new USPopulation("TX", "Dallas", 99999));
40 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
41 | System.out.println(usPopulation);
42 | }
43 |
44 |
45 | @Test
46 | public void delete() {
47 | populationDao.deleteByStateAndCity("TX", "Dallas");
48 | USPopulation usPopulation = populationDao.queryByStateAndCity("TX", "Dallas");
49 | System.out.println(usPopulation);
50 | }
51 |
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/notes/资料分享与工具推荐.md:
--------------------------------------------------------------------------------
1 | 这里分享一些自己学习过程中觉得不错的资料和开发工具。
2 |
3 |
4 |
5 | ## :book: 经典书籍
6 |
7 | - [《hadoop 权威指南 (第四版)》](https://book.douban.com/subject/27115351/) 2017 年
8 | - [《Kafka 权威指南》](https://book.douban.com/subject/27665114/) 2017 年
9 | - [《从 Paxos 到 Zookeeper 分布式一致性原理与实践》](https://book.douban.com/subject/26292004/) 2015 年
10 | - [《Spark 技术内幕 深入解析 Spark 内核架构设计与实现原理》](https://book.douban.com/subject/26649141/) 2015 年
11 | - [《Spark.The.Definitive.Guide》](https://book.douban.com/subject/27035127/) 2018 年
12 | - [《HBase 权威指南》](https://book.douban.com/subject/10748460/) 2012 年
13 | - [《Hive 编程指南》](https://book.douban.com/subject/25791255/) 2013 年
14 | - [《快学 Scala(第 2 版)》](https://book.douban.com/subject/27093751/) 2017 年
15 | - [《Scala 编程》](https://book.douban.com/subject/27591387/) 2018 年
16 |
17 |
18 |
19 | ## :computer: 官方文档
20 |
21 | 上面的书籍我都列出了出版日期,可以看到大部分书籍的出版时间都比较久远了,虽然这些书籍比较经典,但是很多书籍在软件版本上已经滞后了很多。所以推荐优先选择各个框架的**官方文档**作为学习资料。大数据框架的官方文档都很全面,并且对知识点的讲解都做到了简明扼要。这里以 [Spark RDD 官方文档](https://spark.apache.org/docs/latest/rdd-programming-guide.html) 为例,你会发现不仅清晰的知识点导航,而且所有示例都给出了 Java,Scala,Python 三种语言的版本,除了官方文档,其他书籍很少能够做到这一点。
22 |
23 |
24 |
25 | ## :orange_book: 优秀博客
26 |
27 | - 有态度的 HBase/Spark/BigData:http://hbasefly.com/
28 | - 深入 Apache Spark 的设计和实现原理 : https://github.com/JerryLead/SparkInternals
29 | - Jark's Blog - Flink 系列文章:http://wuchong.me/categories/Flink/
30 |
31 |
32 |
33 | ## :triangular_ruler:开发工具
34 |
35 | #### 1. VirtualBox
36 |
37 | 一款开源、免费的虚拟机管理软件,虽然是轻量级软件,但功能很丰富,基本能够满足全部的使用需求。
38 |
39 | 官方网站:https://www.virtualbox.org/
40 |
41 | #### 2. MobaXterm
42 |
43 | 大数据的框架通常都部署在服务器上,这里推荐使用 MobaXterm 进行连接。同样是免费开源的,支持多种连接协议,支持拖拽上传文件,支持使用插件扩展。
44 |
45 | 官方网站:https://mobaxterm.mobatek.net/
46 |
47 | #### 3. Translate Man
48 |
49 | Translate Man 是一款浏览器上的翻译插件 (谷歌和火狐均支持)。它采用谷歌的翻译接口,准确性非常高,支持划词翻译,可以辅助进行官方文档的阅读。
50 |
51 | #### 4. ProcessOn
52 |
53 | ProcessOn 式一个在线绘图平台,使用起来非常便捷,可以用于笔记或者博客配图的绘制。
54 |
55 | 官方网站:https://www.processon.com/
56 |
57 |
--------------------------------------------------------------------------------
/resources/json/emp.json:
--------------------------------------------------------------------------------
1 | {"EMPNO": 7369,"ENAME": "SMITH","JOB": "CLERK","MGR": 7902,"HIREDATE": "1980-12-17 00:00:00","SAL": 800.00,"COMM": null,"DEPTNO": 20}
2 | {"EMPNO": 7499,"ENAME": "ALLEN","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-02-20 00:00:00","SAL": 1600.00,"COMM": 300.00,"DEPTNO": 30}
3 | {"EMPNO": 7521,"ENAME": "WARD","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-02-22 00:00:00","SAL": 1250.00,"COMM": 500.00,"DEPTNO": 30}
4 | {"EMPNO": 7566,"ENAME": "JONES","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-04-02 00:00:00","SAL": 2975.00,"COMM": null,"DEPTNO": 20}
5 | {"EMPNO": 7654,"ENAME": "MARTIN","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-09-28 00:00:00","SAL": 1250.00,"COMM": 1400.00,"DEPTNO": 30}
6 | {"EMPNO": 7698,"ENAME": "BLAKE","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-05-01 00:00:00","SAL": 2850.00,"COMM": null,"DEPTNO": 30}
7 | {"EMPNO": 7782,"ENAME": "CLARK","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-06-09 00:00:00","SAL": 2450.00,"COMM": null,"DEPTNO": 10}
8 | {"EMPNO": 7788,"ENAME": "SCOTT","JOB": "ANALYST","MGR": 7566,"HIREDATE": "1987-04-19 00:00:00","SAL": 1500.00,"COMM": null,"DEPTNO": 20}
9 | {"EMPNO": 7839,"ENAME": "KING","JOB": "PRESIDENT","MGR": null,"HIREDATE": "1981-11-17 00:00:00","SAL": 5000.00,"COMM": null,"DEPTNO": 10}
10 | {"EMPNO": 7844,"ENAME": "TURNER","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-09-08 00:00:00","SAL": 1500.00,"COMM": 0.00,"DEPTNO": 30}
11 | {"EMPNO": 7876,"ENAME": "ADAMS","JOB": "CLERK","MGR": 7788,"HIREDATE": "1987-05-23 00:00:00","SAL": 1100.00,"COMM": null,"DEPTNO": 20}
12 | {"EMPNO": 7900,"ENAME": "JAMES","JOB": "CLERK","MGR": 7698,"HIREDATE": "1981-12-03 00:00:00","SAL": 950.00,"COMM": null,"DEPTNO": 30}
13 | {"EMPNO": 7902,"ENAME": "FORD","JOB": "ANALYST","MGR": 7566,"HIREDATE": "1981-12-03 00:00:00","SAL": 3000.00,"COMM": null,"DEPTNO": 20}
14 | {"EMPNO": 7934,"ENAME": "MILLER","JOB": "CLERK","MGR": 7782,"HIREDATE": "1982-01-23 00:00:00","SAL": 1300.00,"COMM": null,"DEPTNO": 10}
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/consumers/ConsumerASynAndSyn.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.consumers;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
6 |
7 | import java.time.Duration;
8 | import java.time.temporal.ChronoUnit;
9 | import java.util.Collections;
10 | import java.util.Properties;
11 |
12 | /**
13 | * Kafka消费者——同步加异步提交
14 | */
15 | public class ConsumerASynAndSyn {
16 |
17 | public static void main(String[] args) {
18 | String topic = "Hello-Kafka";
19 | String group = "group1";
20 | Properties props = new Properties();
21 | props.put("bootstrap.servers", "hadoop001:9092");
22 | props.put("group.id", group);
23 | props.put("enable.auto.commit", false);
24 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
26 | KafkaConsumer consumer = new KafkaConsumer<>(props);
27 |
28 | consumer.subscribe(Collections.singletonList(topic));
29 |
30 | try {
31 | while (true) {
32 | ConsumerRecords records = consumer.poll(Duration.of(100, ChronoUnit.MILLIS));
33 | for (ConsumerRecord record : records) {
34 | System.out.println(record);
35 | }
36 | // 异步提交
37 | consumer.commitAsync();
38 | }
39 | } catch (Exception e) {
40 | e.printStackTrace();
41 | } finally {
42 | try {
43 | // 因为即将要关闭消费者,所以要用同步提交保证提交成功
44 | consumer.commitSync();
45 | } finally {
46 | consumer.close();
47 | }
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/code/Storm/storm-word-count/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.heibaiying
8 | storm-word-count
9 | 1.0
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 | maven-assembly-plugin
22 |
23 |
24 | src/main/resources/assembly.xml
25 |
26 |
27 |
28 | com.heibaiying.wordcount.ClusterWordCountApp
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | org.apache.storm
40 | storm-core
41 | 1.2.2
42 |
43 |
44 | org.apache.commons
45 | commons-lang3
46 | 3.8.1
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/notes/installation/Storm单机环境搭建.md:
--------------------------------------------------------------------------------
1 | # Storm单机版本环境搭建
2 |
3 | ### 1. 安装环境要求
4 |
5 | > you need to install Storm's dependencies on Nimbus and the worker machines. These are:
6 | >
7 | > 1. Java 7+ (Apache Storm 1.x is tested through travis ci against both java 7 and java 8 JDKs)
8 | > 2. Python 2.6.6 (Python 3.x should work too, but is not tested as part of our CI enviornment)
9 |
10 | 按照[官方文档](http://storm.apache.org/releases/1.2.2/Setting-up-a-Storm-cluster.html) 的说明:storm 运行依赖于 Java 7+ 和 Python 2.6.6 +,所以需要预先安装这两个软件。由于这两个软件在多个框架中都有依赖,其安装步骤单独整理至 :
11 |
12 | + [Linux 环境下 JDK 安装](https://github.com/heibaiying/BigData-Notes/blob/master/notes/installation/Linux下JDK安装.md)
13 |
14 | + [Linux 环境下 Python 安装](https://github.com/heibaiying/BigData-Notes/blob/master/notes/installation/Linux下Python安装.md)
15 |
16 |
17 |
18 | ### 2. 下载并解压
19 |
20 | 下载并解压,官方下载地址:http://storm.apache.org/downloads.html
21 |
22 | ```shell
23 | # tar -zxvf apache-storm-1.2.2.tar.gz
24 | ```
25 |
26 | ### 3. 配置环境变量
27 |
28 | ```shell
29 | # vim /etc/profile
30 | ```
31 |
32 | 添加环境变量:
33 |
34 | ```shell
35 | export STORM_HOME=/usr/app/apache-storm-1.2.2
36 | export PATH=$STORM_HOME/bin:$PATH
37 | ```
38 |
39 | 使得配置的环境变量生效:
40 |
41 | ```shell
42 | # source /etc/profile
43 | ```
44 |
45 |
46 |
47 | ### 4. 启动相关进程
48 |
49 | 因为要启动多个进程,所以统一采用后台进程的方式启动。进入到 `${STORM_HOME}/bin` 目录下,依次执行下面的命令:
50 |
51 | ```shell
52 | # 启动zookeeper
53 | nohup sh storm dev-zookeeper &
54 | # 启动主节点 nimbus
55 | nohup sh storm nimbus &
56 | # 启动从节点 supervisor
57 | nohup sh storm supervisor &
58 | # 启动UI界面 ui
59 | nohup sh storm ui &
60 | # 启动日志查看服务 logviewer
61 | nohup sh storm logviewer &
62 | ```
63 |
64 |
65 |
66 | ### 5. 验证是否启动成功
67 |
68 | 验证方式一:jps 查看进程:
69 |
70 | ```shell
71 | [root@hadoop001 app]# jps
72 | 1074 nimbus
73 | 1283 Supervisor
74 | 620 dev_zookeeper
75 | 1485 core
76 | 9630 logviewer
77 | ```
78 |
79 | 验证方式二: 访问 8080 端口,查看 Web-UI 界面:
80 |
81 |
82 |
--------------------------------------------------------------------------------
/code/Flink/flink-state-management/src/main/java/com/heibaiying/keyedstate/ThresholdWarning.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.keyedstate;
2 |
3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
4 | import org.apache.flink.api.common.state.ListState;
5 | import org.apache.flink.api.common.state.ListStateDescriptor;
6 | import org.apache.flink.api.java.tuple.Tuple2;
7 | import org.apache.flink.configuration.Configuration;
8 | import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
9 | import org.apache.flink.util.Collector;
10 |
11 | import java.util.ArrayList;
12 | import java.util.List;
13 |
14 | public class ThresholdWarning extends RichFlatMapFunction, Tuple2>> {
15 |
16 | // 通过ListState来存储非正常数据的状态
17 | private transient ListState abnormalData;
18 | // 需要监控阈值
19 | private Long threshold;
20 | // 达到阈值多少次后触发报警
21 | private Integer numberOfTimes;
22 |
23 | ThresholdWarning(Long threshold, Integer numberOfTimes) {
24 | this.threshold = threshold;
25 | this.numberOfTimes = numberOfTimes;
26 | }
27 |
28 | @Override
29 | public void open(Configuration parameters) {
30 | // 通过状态名称(句柄)获取状态实例,如果不存在则会自动创建
31 | abnormalData = getRuntimeContext().getListState(new ListStateDescriptor<>("abnormalData", Long.class));
32 | }
33 |
34 | @Override
35 | public void flatMap(Tuple2 value, Collector>> out) throws Exception {
36 | Long inputValue = value.f1;
37 | // 如果输入值超过阈值,则记录该次不正常的数据信息
38 | if (inputValue >= threshold) {
39 | abnormalData.add(inputValue);
40 | }
41 | ArrayList list = Lists.newArrayList(abnormalData.get().iterator());
42 | // 如果不正常的数据出现达到一定次数,则输出报警信息
43 | if (list.size() >= numberOfTimes) {
44 | out.collect(Tuple2.of(value.f0 + " 超过指定阈值 ", list));
45 | // 报警信息输出后,清空暂存的状态
46 | abnormalData.clear();
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/notes/大数据常用软件安装指南.md:
--------------------------------------------------------------------------------
1 | ## 大数据常用软件安装指南
2 |
3 | 为方便大家查阅,本仓库所有软件的安装方式单独整理如下:
4 |
5 | ### 一、基础软件安装
6 |
7 | 1. [Linux 环境下 JDK 安装](installation/Linux下JDK安装.md)
8 | 2. [Linux 环境下 Python 安装](installation/Linux下Python安装.md)
9 | 3. [虚拟机静态 IP 及多 IP 配置](installation/虚拟机静态IP及多IP配置.md)
10 |
11 | ### 二、Hadoop
12 |
13 | 1. [Hadoop 单机环境搭建](installation/Hadoop单机环境搭建.md)
14 | 2. [Hadoop 集群环境搭建](installation/Hadoop集群环境搭建.md)
15 | 3. [基于 Zookeeper 搭建 Hadoop 高可用集群](installation/基于Zookeeper搭建Hadoop高可用集群.md)
16 |
17 | ### 三、Spark
18 |
19 | 1. [Spark 开发环境搭建](installation/Spark开发环境搭建.md)
20 | 2. [基于 Zookeeper 搭建 Spark 高可用集群](installation/Spark集群环境搭建.md)
21 |
22 | ### 四、Flink
23 |
24 | 1. [Flink Standalone 集群部署](installation/Flink_Standalone_Cluster.md)
25 |
26 | ### 五、Storm
27 |
28 | 1. [Storm 单机环境搭建](installation/Storm单机环境搭建.md)
29 | 2. [Storm 集群环境搭建](installation/Storm集群环境搭建.md)
30 |
31 | ### 六、HBase
32 |
33 | 1. [HBase 单机环境搭建](installation/HBase单机环境搭建.md)
34 | 2. [HBase 集群环境搭建](installation/HBase集群环境搭建.md)
35 |
36 | ### 七、Flume
37 |
38 | 1. [Linux 环境下 Flume 的安装部署](installation/Linux下Flume的安装.md)
39 |
40 | ### 八、Azkaban
41 |
42 | 1. [Azkaban3.x 编译及部署](installation/Azkaban_3.x_编译及部署.md)
43 |
44 | ### 九、Hive
45 |
46 | 1. [Linux 环境下 Hive 的安装部署](installation/Linux环境下Hive的安装部署.md)
47 |
48 | ### 十、Zookeeper
49 |
50 | 1. [Zookeeper 单机环境和集群环境搭建](installation/Zookeeper单机环境和集群环境搭建.md)
51 |
52 | ### 十一、Kafka
53 |
54 | 1. [基于 Zookeeper 搭建 Kafka 高可用集群](installation/基于Zookeeper搭建Kafka高可用集群.md)
55 |
56 |
57 | ### 版本说明
58 |
59 | 由于 Apache Hadoop 原有安装包之间兼容性比较差,所以如无特殊需求,本仓库一律选择 **CDH** (Cloudera's Distribution, including Apache Hadoop) 版本的安装包。它基于稳定版本的 Apache Hadoop 构建,并做了兼容性测试,是目前生产环境中使用最为广泛的版本。
60 |
61 | 最新的 CDH 5 的下载地址为:http://archive.cloudera.com/cdh5/cdh/5/ 。这个页面很大且加载速度比较慢,需要耐心等待页面加载完成。上半部分是文档链接,后半部分才是安装包。同一个 CDH 版本的不同框架间都做了集成测试,可以保证没有任何 JAR 包冲突。安装包包名通常如下所示,这里 CDH 版本都是 `5.15.2` ,前面是各个软件自己的版本 ,未避免出现不必要的 JAR 包冲突,**请务必保持 CDH 的版本一致**。
62 |
63 | ```hsell
64 | hadoop-2.6.0-cdh5.15.2.tar.gz
65 | hbase-1.2.0-cdh5.15.2
66 | hive-1.1.0-cdh5.15.2.tar.gz
67 | ```
68 |
--------------------------------------------------------------------------------
/code/Phoenix/spring-mybatis-phoenix/src/main/resources/springApplication.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/code/spark/spark-streaming-kafka/src/main/scala/com/heibaiying/kafka/KafkaDirectStream.scala:
--------------------------------------------------------------------------------
1 | package com.heibaiying.kafka
2 |
3 | import org.apache.kafka.common.serialization.StringDeserializer
4 | import org.apache.spark.SparkConf
5 | import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
6 | import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
7 | import org.apache.spark.streaming.kafka010._
8 | import org.apache.spark.streaming.{Seconds, StreamingContext}
9 |
10 |
11 | /**
12 | * spark streaming 整合 kafka
13 | */
14 | object KafkaDirectStream {
15 |
16 |
17 | def main(args: Array[String]): Unit = {
18 |
19 | val sparkConf = new SparkConf().setAppName("KafkaDirectStream").setMaster("local[2]")
20 | val streamingContext = new StreamingContext(sparkConf, Seconds(5))
21 |
22 | val kafkaParams = Map[String, Object](
23 | /*
24 | * 指定broker的地址清单,清单里不需要包含所有的broker地址,生产者会从给定的broker里查找其他broker的信息。
25 | * 不过建议至少提供两个broker的信息作为容错。
26 | */
27 | "bootstrap.servers" -> "hadoop001:9092",
28 | /*键的序列化器*/
29 | "key.deserializer" -> classOf[StringDeserializer],
30 | /*值的序列化器*/
31 | "value.deserializer" -> classOf[StringDeserializer],
32 | /*消费者所在分组的ID*/
33 | "group.id" -> "spark-streaming-group",
34 | /*
35 | * 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
36 | * latest: 在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
37 | * earliest: 在偏移量无效的情况下,消费者将从起始位置读取分区的记录
38 | */
39 | "auto.offset.reset" -> "latest",
40 | /*是否自动提交*/
41 | "enable.auto.commit" -> (true: java.lang.Boolean)
42 | )
43 |
44 | val topics = Array("spark-streaming-topic")
45 | val stream = KafkaUtils.createDirectStream[String, String](
46 | streamingContext,
47 | PreferConsistent,
48 | Subscribe[String, String](topics, kafkaParams)
49 | )
50 |
51 | /*打印输入流*/
52 | stream.map(record => (record.key, record.value)).print()
53 |
54 | streamingContext.start()
55 | streamingContext.awaitTermination()
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/notes/Azkaban简介.md:
--------------------------------------------------------------------------------
1 | # Azkaban简介
2 |
3 |
4 | ## 一、Azkaban 介绍
5 |
6 | #### 1.1 背景
7 |
8 | 一个完整的大数据分析系统,必然由很多任务单元 (如数据收集、数据清洗、数据存储、数据分析等) 组成,所有的任务单元及其之间的依赖关系组成了复杂的工作流。复杂的工作流管理涉及到很多问题:
9 |
10 | - 如何定时调度某个任务?
11 | - 如何在某个任务执行完成后再去执行另一个任务?
12 | - 如何在任务失败时候发出预警?
13 | - ......
14 |
15 | 面对这些问题,工作流调度系统应运而生。Azkaban 就是其中之一。
16 |
17 | #### 1.2 功能
18 |
19 | Azkaban 产生于 LinkedIn,并经过多年生产环境的检验,它具备以下功能:
20 |
21 | - 兼容任何版本的 Hadoop
22 | - 易于使用的 Web UI
23 | - 可以使用简单的 Web 页面进行工作流上传
24 | - 支持按项目进行独立管理
25 | - 定时任务调度
26 | - 模块化和可插入
27 | - 身份验证和授权
28 | - 跟踪用户操作
29 | - 支持失败和成功的电子邮件提醒
30 | - SLA 警报和自动查杀失败任务
31 | - 重试失败的任务
32 |
33 | Azkaban 的设计理念是在保证功能实现的基础上兼顾易用性,其页面风格清晰明朗,下面是其 WEB UI 界面:
34 |
35 |
36 |
37 | ## 二、Azkaban 和 Oozie
38 |
39 | Azkaban 和 Oozie 都是目前使用最为广泛的工作流调度程序,其主要区别如下:
40 |
41 | #### 功能对比
42 |
43 | - 两者均可以调度 Linux 命令、MapReduce、Spark、Pig、Java、Hive 等工作流任务;
44 | - 两者均可以定时执行工作流任务。
45 |
46 | #### 工作流定义
47 |
48 | - Azkaban 使用 Properties(Flow 1.0) 和 YAML(Flow 2.0) 文件定义工作流;
49 | - Oozie 使用 Hadoop 流程定义语言(hadoop process defination language,HPDL)来描述工作流,HPDL 是一种 XML 流程定义语言。
50 |
51 | #### 资源管理
52 |
53 | - Azkaban 有较严格的权限控制,如用户对工作流进行读/写/执行等操作;
54 | - Oozie 暂无严格的权限控制。
55 |
56 | #### 运行模式
57 |
58 | + Azkaban 3.x 提供了两种运行模式:
59 | + **solo server model(单服务模式)** :元数据默认存放在内置的 H2 数据库(可以修改为 MySQL),该模式中 `webServer`(管理服务器) 和 `executorServer`(执行服务器) 运行在同一个进程中,进程名是 `AzkabanSingleServer`。该模式适用于小规模工作流的调度。
60 | + **multiple-executor(分布式多服务模式)** :存放元数据的数据库为 MySQL,MySQL 应采用主从模式进行备份和容错。这种模式下 `webServer` 和 `executorServer` 在不同进程中运行,彼此之间互不影响,适合用于生产环境。
61 |
62 | + Oozie 使用 Tomcat 等 Web 容器来展示 Web 页面,默认使用 derby 存储工作流的元数据,由于 derby 过于轻量,实际使用中通常用 MySQL 代替。
63 |
64 |
65 |
66 |
67 |
68 | ## 三、总结
69 |
70 | 如果你的工作流不是特别复杂,推荐使用轻量级的 Azkaban,主要有以下原因:
71 |
72 | + **安装方面**:Azkaban 3.0 之前都是提供安装包的,直接解压部署即可。Azkaban 3.0 之后的版本需要编译,这个编译是基于 gradle 的,自动化程度比较高;
73 | + **页面设计**:所有任务的依赖关系、执行结果、执行日志都可以从界面上直观查看到;
74 | + **配置方面**:Azkaban Flow 1.0 基于 Properties 文件来定义工作流,这个时候的限制可能会多一点。但是在 Flow 2.0 就支持了 YARM。YARM 语法更加灵活简单,著名的微服务框架 Spring Boot 就采用的 YAML 代替了繁重的 XML。
75 |
76 |
77 |
--------------------------------------------------------------------------------
/code/Hbase/hbase-observer-coprocessor/src/main/java/com/heibaiying/AppendRegionObserver.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying;
2 |
3 | import org.apache.hadoop.hbase.Cell;
4 | import org.apache.hadoop.hbase.CellUtil;
5 | import org.apache.hadoop.hbase.client.Durability;
6 | import org.apache.hadoop.hbase.client.Get;
7 | import org.apache.hadoop.hbase.client.Put;
8 | import org.apache.hadoop.hbase.client.Result;
9 | import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
10 | import org.apache.hadoop.hbase.coprocessor.ObserverContext;
11 | import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
12 | import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
13 | import org.apache.hadoop.hbase.util.Bytes;
14 |
15 | import java.io.IOException;
16 | import java.util.List;
17 |
18 | /**
19 | * 对相同的article:content执行put命令时,将新插入的内容添加到原有内容的末尾
20 | */
21 | public class AppendRegionObserver extends BaseRegionObserver {
22 |
23 | private byte[] columnFamily = Bytes.toBytes("article");
24 | private byte[] qualifier = Bytes.toBytes("content");
25 |
26 | @Override
27 | public void prePut(ObserverContext e, Put put, WALEdit edit,
28 | Durability durability) throws IOException {
29 | if (put.has(columnFamily, qualifier)) {
30 | // 遍历查询结果,获取指定列的原值
31 | Result rs = e.getEnvironment().getRegion().get(new Get(put.getRow()));
32 | String oldValue = "";
33 | for (Cell cell : rs.rawCells())
34 | if (CellUtil.matchingColumn(cell, columnFamily, qualifier)) {
35 | oldValue = Bytes.toString(CellUtil.cloneValue(cell));
36 | }
37 |
38 | // 获取指定列新插入的值
39 | List cells = put.get(columnFamily, qualifier);
40 | String newValue = "";
41 | for (Cell cell : cells) {
42 | if (CellUtil.matchingColumn(cell, columnFamily, qualifier)) {
43 | newValue = Bytes.toString(CellUtil.cloneValue(cell));
44 | }
45 | }
46 |
47 | // Append 操作
48 | put.addColumn(columnFamily, qualifier, Bytes.toBytes(oldValue + newValue));
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/code/Kafka/kafka-basis/src/main/java/com/heibaiying/consumers/ConsumerASyn.java:
--------------------------------------------------------------------------------
1 | package com.heibaiying.consumers;
2 |
3 | import org.apache.kafka.clients.consumer.*;
4 | import org.apache.kafka.common.TopicPartition;
5 |
6 | import java.time.Duration;
7 | import java.time.temporal.ChronoUnit;
8 | import java.util.Collections;
9 | import java.util.Map;
10 | import java.util.Properties;
11 |
12 | /**
13 | * Kafka消费者——异步提交
14 | */
15 | public class ConsumerASyn {
16 |
17 | public static void main(String[] args) {
18 | String topic = "Hello-Kafka";
19 | String group = "group1";
20 | Properties props = new Properties();
21 | props.put("bootstrap.servers", "hadoop001:9092");
22 | props.put("group.id", group);
23 | props.put("enable.auto.commit", false);
24 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
26 | KafkaConsumer consumer = new KafkaConsumer<>(props);
27 |
28 | consumer.subscribe(Collections.singletonList(topic));
29 |
30 | try {
31 | while (true) {
32 | ConsumerRecords records = consumer.poll(Duration.of(100, ChronoUnit.MILLIS));
33 | for (ConsumerRecord record : records) {
34 | System.out.println(record);
35 | }
36 | /*异步提交并定义回调*/
37 | consumer.commitAsync(new OffsetCommitCallback() {
38 | @Override
39 | public void onComplete(Map offsets, Exception exception) {
40 | if (exception != null) {
41 | System.out.println("错误处理");
42 | offsets.forEach((x, y) -> System.out.printf("topic = %s,partition = %d, offset = %s \n",
43 | x.topic(), x.partition(), y.offset()));
44 | }
45 | }
46 | });
47 | }
48 | } finally {
49 | consumer.close();
50 | }
51 |
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
|