├── README.md
├── log
    └── test.log
├── pom.xml
└── src
    └── main
        ├── java
            └── com
            │   └── tools
            │       ├── clickhouse
            │           └── 环境搭建.md
            │       ├── hadoop
            │           ├── config
            │           │   ├── core-site.xml
            │           │   ├── hdfs-site.xml
            │           │   ├── mapred-site.xml
            │           │   └── yarn-site.xml
            │           ├── hdfs
            │           │   ├── HDFSClient.java
            │           │   ├── HDFSStream.java
            │           │   └── UploadFileToHdfsByCompress.java
            │           ├── mr
            │           │   ├── HFileGenerator.java
            │           │   ├── HFileImportMapper.java
            │           │   ├── dataclean
            │           │   │   └── DataClean.java
            │           │   ├── duplicate
            │           │   │   └── DuplicateRemoval.java
            │           │   ├── inputformat
            │           │   │   ├── SmallFiles2SequenceFile.java
            │           │   │   ├── SmallFiles2SequenceMapper.java
            │           │   │   ├── WholeFileInputFormat.java
            │           │   │   └── WholeRecordReader.java
            │           │   ├── outputformat
            │           │   │   ├── MyFileOutputFormat.java
            │           │   │   ├── MyFileOutputFormatMain.java
            │           │   │   └── MyRecordWriter.java
            │           │   ├── secondarysort
            │           │   │   ├── MyOrder.java
            │           │   │   ├── MyOrderGroup.java
            │           │   │   ├── MyOrderMapper.java
            │           │   │   ├── MyOrderReducer.java
            │           │   │   ├── MyPartitioner.java
            │           │   │   └── OrderBean.java
            │           │   └── wordcount
            │           │   │   ├── LogCount.java
            │           │   │   ├── LogCountMapper.java
            │           │   │   ├── LogCountReducer.java
            │           │   │   ├── MyPartitioner.java
            │           │   │   ├── WordCountByPartitioner.java
            │           │   │   ├── WordCountMapper.java
            │           │   │   └── WordCountReducer.java
            │           └── 集群部署文档.md
            │       ├── hbase
            │           ├── HBaseFilter.java
            │           ├── HBaseReadWrite.java
            │           ├── HBaseTestUtil.java
            │           ├── HBase读写的几种方式.pdf
            │           ├── Utils.java
            │           ├── hdfs2hbase
            │           │   ├── HDFS2HBase.java
            │           │   └── scores.txt
            │           └── processor
            │           │   ├── HBasePerson.java
            │           │   └── HBaseProcessor.java
            │       ├── hive
            │           ├── MyUDAF.java
            │           ├── MyUDF.java
            │           ├── MyUDTF.java
            │           └── sql
            │           │   └── test.sql
            │       ├── kafka
            │           ├── CustomPartitioner.java
            │           ├── consumer
            │           │   ├── ConsumerCommitOffset.java
            │           │   ├── ConsumerInterceptorTTL.java
            │           │   ├── ConsumerReBalance.java
            │           │   └── ConsumerThread.java
            │           ├── producer
            │           │   └── ProducerRandomInt.java
            │           ├── quickstart
            │           │   ├── ConsumerQuickStart.java
            │           │   └── ProducerQuickStart.java
            │           └── readme.md
            │       ├── redis
            │           ├── CacheTuning.java
            │           ├── DistributedTool.java
            │           ├── lettuce
            │           │   ├── LettuceTools.java
            │           │   ├── MyListener.java
            │           │   ├── PubSubByLettuce.java
            │           │   ├── QuickStartByLettuce.java
            │           │   └── TransactionsByLettuce.java
            │           ├── redisson
            │           │   ├── RedissonDelayQueue.java
            │           │   └── RedissonDelayQueueConsumer.java
            │           ├── redis持久化详解.md
            │           ├── redis的五大数据类型.md
            │           └── 分布式数据库与缓存双写一致性方案.md
            │       └── zookeeper
            │           ├── discovery
            │               ├── client
            │               │   └── DistributeClient.java
            │               ├── server
            │               │   └── DistributeServer.java
            │               └── 服务注册与发现.md
            │           ├── election
            │               ├── Broker_1.java
            │               ├── Broker_2.java
            │               ├── Broker_3.java
            │               └── ZkElectionUtil.java
            │           └── zookeeper选举机制.pdf
        └── resources
            └── log4j2.xml


/README.md:
--------------------------------------------------------------------------------
 1 | ### Hadoop Practice
 2 | 
 3 | - [hadoop集群部署](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hadoop/%E9%9B%86%E7%BE%A4%E9%83%A8%E7%BD%B2%E6%96%87%E6%A1%A3.md)
 4 | - [HDFS](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/hadoop/hdfs)
 5 | - [MapReduce](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/hadoop/mr)
 6 | 
 7 | ### HBase Practice
 8 | - [HBase读写操作](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/hbase)
 9 | - [HBase过滤器](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hbase/HBaseFilter.java)
10 | - [HBase协处理器](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hbase/processor/HBaseProcessor.java)
11 | 
12 | ### Hive Practice
13 | 
14 | 
15 | 
16 | ### Kafka Practice
17 | 
18 | - [生产者/消费者QuickStart](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/kafka)
19 | - [自定义消费者消息过期拦截器](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/kafka/consumer/ConsumerInterceptorTTL.java)
20 | - [Kafka ReBalance监听器](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/kafka/consumer/ConsumerReBalance.java)
21 | - [Kafka 消费者多线程消费](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/kafka/consumer/ConsumerThread.java)
22 | 
23 | ### Click House
24 | 
25 | - [docker部署文档](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/clickhouse/%E7%8E%AF%E5%A2%83%E6%90%AD%E5%BB%BA.md)
26 | 
27 | ### Redis Practice
28 | 
29 | - [缓存击穿，缓存穿透，缓存雪崩的解决方案](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/redis)
30 | - [分布式数据库与缓存双写一致性解决方案](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/redis)
31 | - [使用redis简单正确实现分布式锁](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/redis)
32 | - [使用Lettuce作为redis客户端实例](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/redis)
33 | - [redis持久化详解](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/redis)
34 | 
35 | ### Zookeeper Practice
36 | 
37 | - [基于zookeeper的服务注册与发现](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/zookeeper/discovery)
38 | - [基于zookeeper实现leader选举](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/zookeeper/election)
39 | - [zookeeper的leader选举机制](https://github.com/sev7e0/bigdata-practice/tree/master/src/main/java/com/tools/zookeeper/zookeeper选举机制.pdf)


--------------------------------------------------------------------------------
/log/test.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/log/test.log


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  2 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 |     <modelVersion>4.0.0</modelVersion>
  5 |     <groupId>com.sev7e0</groupId>
  6 |     <artifactId>bigdata-practice</artifactId>
  7 |     <version>0</version>
  8 |     <build>
  9 |         <plugins>
 10 |             <plugin>
 11 |                 <groupId>org.apache.maven.plugins</groupId>
 12 |                 <artifactId>maven-compiler-plugin</artifactId>
 13 |                 <configuration>
 14 |                     <source>8</source>
 15 |                     <target>8</target>
 16 |                 </configuration>
 17 |             </plugin>
 18 |         </plugins>
 19 |     </build>
 20 |     <repositories>
 21 |         <repository>
 22 |             <id>cloudera</id>
 23 |             <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
 24 |         </repository>
 25 |     </repositories>
 26 |     <properties>
 27 |         <hadoop.version>2.7.5</hadoop.version>
 28 |         <hbase.version>1.2.6</hbase.version>
 29 |         <kafka.version>2.2.0</kafka.version>
 30 |         <redis.verison>2.10.0</redis.verison>
 31 |         <lombok.version>1.16.20</lombok.version>
 32 |         <zookeeper.version>3.4.14</zookeeper.version>
 33 |         <guava.version>16.0</guava.version>
 34 |         <lettuce.version>5.1.6.RELEASE</lettuce.version>
 35 |         <slf4j.version>1.7.22</slf4j.version>
 36 |         <log4j2.version>2.17.0</log4j2.version>
 37 |         <pulsar.version>2.3.2</pulsar.version>
 38 |         <hive.version>3.1.2</hive.version>
 39 |     </properties>
 40 | 
 41 |     <dependencies>
 42 |         <!--Kafka依赖-->
 43 |         <dependency>
 44 |             <groupId>org.apache.kafka</groupId>
 45 |             <artifactId>kafka_2.12</artifactId>
 46 |             <version>${kafka.version}</version>
 47 |             <exclusions>
 48 |                 <exclusion>
 49 |                     <groupId>com.fasterxml.jackson.core</groupId>
 50 |                     <artifactId>*</artifactId>
 51 |                 </exclusion>
 52 |             </exclusions>
 53 |         </dependency>
 54 |         <!--hadoop依赖-->
 55 |         <dependency>
 56 |             <groupId>org.apache.hadoop</groupId>
 57 |             <artifactId>hadoop-client</artifactId>
 58 |             <version>${hadoop.version}</version>
 59 |             <exclusions>
 60 |                 <exclusion>
 61 |                     <groupId>io.netty</groupId>
 62 |                     <artifactId>*</artifactId>
 63 |                 </exclusion>
 64 |                 <exclusion>
 65 |                     <artifactId>slf4j-log4j12</artifactId>
 66 |                     <groupId>org.slf4j</groupId>
 67 |                 </exclusion>
 68 |             </exclusions>
 69 |         </dependency>
 70 |         <!--HBase依赖-->
 71 |         <dependency>
 72 |             <groupId>org.apache.hbase</groupId>
 73 |             <artifactId>hbase-server</artifactId>
 74 |             <version>${hbase.version}</version>
 75 |             <exclusions>
 76 |                 <exclusion>
 77 |                     <groupId>io.netty</groupId>
 78 |                     <artifactId>*</artifactId>
 79 |                 </exclusion>
 80 |                 <exclusion>
 81 |                     <artifactId>slf4j-log4j12</artifactId>
 82 |                     <groupId>org.slf4j</groupId>
 83 |                 </exclusion>
 84 |             </exclusions>
 85 |         </dependency>
 86 |         <dependency>
 87 |             <groupId>org.apache.hbase</groupId>
 88 |             <artifactId>hbase-client</artifactId>
 89 |             <version>${hbase.version}</version>
 90 |             <exclusions>
 91 |                 <exclusion>
 92 |                     <groupId>io.netty</groupId>
 93 |                     <artifactId>*</artifactId>
 94 |                 </exclusion>
 95 |                 <exclusion>
 96 |                     <artifactId>slf4j-log4j12</artifactId>
 97 |                     <groupId>org.slf4j</groupId>
 98 |                 </exclusion>
 99 |             </exclusions>
100 |         </dependency>
101 | 
102 |         <!--hive-->
103 |         <dependency>
104 |             <groupId>org.apache.hive</groupId>
105 |             <artifactId>hive-exec</artifactId>
106 |             <version>${hive.version}</version>
107 |             <exclusions>
108 |                 <exclusion>
109 |                     <artifactId>slf4j-log4j12</artifactId>
110 |                     <groupId>org.slf4j</groupId>
111 |                 </exclusion>
112 |                 <exclusion>
113 |                     <groupId>org.glassfish</groupId>
114 |                     <artifactId>javax.el</artifactId>
115 |                 </exclusion>
116 |             </exclusions>
117 |         </dependency>
118 |         <dependency>
119 |             <groupId>org.apache.hive</groupId>
120 |             <artifactId>hive-jdbc</artifactId>
121 |             <version>${hive.version}</version>
122 |             <exclusions>
123 |                 <exclusion>
124 |                     <artifactId>slf4j-log4j12</artifactId>
125 |                     <groupId>org.slf4j</groupId>
126 |                 </exclusion>
127 |                 <exclusion>
128 |                     <groupId>org.glassfish</groupId>
129 |                     <artifactId>javax.el</artifactId>
130 |                 </exclusion>
131 |             </exclusions>
132 |         </dependency>
133 |         <dependency>
134 |             <groupId>org.apache.hive</groupId>
135 |             <artifactId>hive-cli</artifactId>
136 |             <version>${hive.version}</version>
137 |             <exclusions>
138 |                 <exclusion>
139 |                     <artifactId>slf4j-log4j12</artifactId>
140 |                     <groupId>org.slf4j</groupId>
141 |                 </exclusion>
142 |                 <exclusion>
143 |                     <groupId>org.glassfish</groupId>
144 |                     <artifactId>javax.el</artifactId>
145 |                 </exclusion>
146 |             </exclusions>
147 |         </dependency>
148 | 
149 |         <!--zookeeper依赖-->
150 |         <dependency>
151 |             <groupId>org.apache.zookeeper</groupId>
152 |             <artifactId>zookeeper</artifactId>
153 |             <version>${zookeeper.version}</version>
154 |             <exclusions>
155 |                 <exclusion>
156 |                     <groupId>io.netty</groupId>
157 |                     <artifactId>*</artifactId>
158 |                 </exclusion>
159 |                 <exclusion>
160 |                     <artifactId>slf4j-log4j12</artifactId>
161 |                     <groupId>org.slf4j</groupId>
162 |                 </exclusion>
163 |             </exclusions>
164 |         </dependency>
165 |         <dependency>
166 |             <groupId>org.apache.curator</groupId>
167 |             <artifactId>curator-framework</artifactId>
168 |             <version>4.0.0</version>
169 |             <exclusions>
170 |                 <exclusion>
171 |                     <artifactId>slf4j-log4j12</artifactId>
172 |                     <groupId>org.slf4j</groupId>
173 |                 </exclusion>
174 |             </exclusions>
175 |         </dependency>
176 |         <!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
177 |         <!--        <dependency>-->
178 |         <!--            <groupId>com.google.guava</groupId>-->
179 |         <!--            <artifactId>guava</artifactId>-->
180 |         <!--            <version>${guava.version}</version>-->
181 |         <!--        </dependency>-->
182 |         <!--jedis作为redis客户端-->
183 |         <dependency>
184 |             <groupId>redis.clients</groupId>
185 |             <artifactId>jedis</artifactId>
186 |             <version>${redis.verison}</version>
187 |         </dependency>
188 |         <!--尝试使用lettuce作为redis客户端-->
189 |         <dependency>
190 |             <groupId>io.lettuce</groupId>
191 |             <artifactId>lettuce-core</artifactId>
192 |             <version>${lettuce.version}</version>
193 |             <exclusions>
194 |                 <exclusion>
195 |                     <artifactId>slf4j-log4j12</artifactId>
196 |                     <groupId>org.slf4j</groupId>
197 |                 </exclusion>
198 |             </exclusions>
199 |         </dependency>
200 |         <dependency>
201 |             <groupId>org.slf4j</groupId>
202 |             <artifactId>slf4j-api</artifactId>
203 |             <version>${slf4j.version}</version>
204 |         </dependency>
205 |         <dependency>
206 |             <groupId>org.apache.curator</groupId>
207 |             <artifactId>curator-recipes</artifactId>
208 |             <version>4.0.0</version>
209 |             <exclusions>
210 |                 <exclusion>
211 |                     <artifactId>slf4j-log4j12</artifactId>
212 |                     <groupId>org.slf4j</groupId>
213 |                 </exclusion>
214 |             </exclusions>
215 |         </dependency>
216 |         <dependency>
217 |             <groupId>org.apache.pulsar</groupId>
218 |             <artifactId>pulsar-client</artifactId>
219 |             <version>${pulsar.version}</version>
220 |             <exclusions>
221 |                 <exclusion>
222 |                     <artifactId>slf4j-log4j12</artifactId>
223 |                     <groupId>org.slf4j</groupId>
224 |                 </exclusion>
225 |             </exclusions>
226 |         </dependency>
227 |         <dependency>
228 |             <groupId>com.alibaba</groupId>
229 |             <artifactId>fastjson</artifactId>
230 |             <version>1.2.33</version>
231 |             <exclusions>
232 |                 <exclusion>
233 |                     <artifactId>slf4j-log4j12</artifactId>
234 |                     <groupId>org.slf4j</groupId>
235 |                 </exclusion>
236 |             </exclusions>
237 |         </dependency>
238 |         <dependency>
239 |             <groupId>org.glassfish</groupId>
240 |             <artifactId>javax.el</artifactId>
241 |             <version>3.0.1-b06</version>
242 |             <exclusions>
243 |                 <exclusion>
244 |                     <artifactId>slf4j-log4j12</artifactId>
245 |                     <groupId>org.slf4j</groupId>
246 |                 </exclusion>
247 |             </exclusions>
248 |         </dependency>
249 |         <dependency>
250 |             <groupId>org.redisson</groupId>
251 |             <artifactId>redisson</artifactId>
252 |             <version>3.14.0</version>
253 |             <exclusions>
254 |                 <exclusion>
255 |                     <artifactId>slf4j-log4j12</artifactId>
256 |                     <groupId>org.slf4j</groupId>
257 |                 </exclusion>
258 |             </exclusions>
259 |         </dependency>
260 |         <dependency>
261 |             <groupId>org.projectlombok</groupId>
262 |             <artifactId>lombok</artifactId>
263 |             <version>1.18.4</version>
264 |         </dependency>
265 |         <dependency>
266 |             <groupId>org.apache.logging.log4j</groupId>
267 |             <artifactId>log4j-core</artifactId>
268 |             <version>${log4j2.version}</version>
269 |         </dependency>
270 |         <dependency>
271 |             <groupId>org.apache.logging.log4j</groupId>
272 |             <artifactId>log4j-api</artifactId>
273 |             <version>${log4j2.version}</version>
274 |         </dependency>
275 |     </dependencies>
276 | </project>
277 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/clickhouse/环境搭建.md:
--------------------------------------------------------------------------------
 1 | ## Click House docker环境搭建
 2 | 
 3 | ### 服务搭建
 4 | 
 5 | ```shell
 6 | docker run \
 7 | --name clickhouse-docker \
 8 | -d \
 9 | -p 8123:8123 \
10 | -p 9000:9000 \
11 | -p 9009:9009 \
12 | --ulimit nofile=262144:262144 \
13 | -v $PWD/data/:/var/lib/clickhouse yandex/clickhouse-server
14 | ```
15 | 
16 | ### 原生客户端搭建
17 | 
18 | ```shell
19 | docker run \
20 | -it --rm --link clickhouse-docker:clickhouse-server \
21 | yandex/clickhouse-client --host clickhouse-docker
22 | ```


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/config/core-site.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |     <!--需要和hdfs-site.xml中的保持一致-->
 3 |     <property>
 4 |         <name>fs.defaultFS</name>
 5 |         <value>hdfs://ns1</value>
 6 |     </property>
 7 |     <property>
 8 |         <name>hadoop.tmp.dir</name>
 9 |         <value>/hadoopadmin/datadir/hadoop/tempDatas</value>
10 |     </property>
11 |     <!--  缓冲区大小，实际工作中根据服务器性能动态调整 -->
12 |     <property>
13 |         <name>io.file.buffer.size</name>
14 |         <value>4096</value>
15 |     </property>
16 |     <property>
17 |         <name>fs.trash.interval</name>
18 |         <value>10080</value>
19 |         <description>检查点被删除后的分钟数。 如果为零，垃圾桶功能将被禁用。 
20 |         该选项可以在服务器和客户端上配置。 如果垃圾箱被禁用服务器端，则检查客户端配置。 
21 |         如果在服务器端启用垃圾箱，则会使用服务器上配置的值，并忽略客户端配置值。</description>
22 |     </property>
23 |     <!--zookeeper配置-->
24 |     <property>
25 |         <name>ha.zookeeper.quorum</name>
26 |         <value>node01:2181,node02:2181,node03:2181</value>
27 |     </property>
28 |     <property>
29 |         <name>fs.trash.checkpoint.interval</name>
30 |         <value>0</value>
31 |         <description>垃圾检查点之间的分钟数。 应该小于或等于fs.trash.interval。 
32 |         如果为零，则将该值设置为fs.trash.interval的值。 每次检查指针运行时，
33 |         它都会从当前创建一个新的检查点，并删除比fs.trash.interval更早创建的检查点。</description>
34 |     </property>
35 | </configuration>


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/config/hdfs-site.xml:
--------------------------------------------------------------------------------
  1 | <configuration>
  2 | 	<!--指定hdfs的nameservice为ns1，需要和core-site.xml中的保持一致 -->
  3 | 	<property>
  4 | 		<name>dfs.nameservices</name>
  5 | 		<value>ns1</value>
  6 | 	</property>
  7 | 	<!-- ns1下面有两个NameNode，分别是nn1，nn2 -->
  8 | 	<property>
  9 | 		<name>dfs.ha.namenodes.ns1</name>
 10 | 		<value>nn1,nn2</value>
 11 | 	</property>
 12 | 	<!-- nn1的RPC通信地址 -->
 13 | 	<property>
 14 | 		<name>dfs.namenode.rpc-address.ns1.nn1</name>
 15 | 		<value>node01:9000</value>
 16 | 	</property>
 17 | 	<!-- nn1的http通信地址 -->
 18 | 	<property>
 19 | 		<name>dfs.namenode.http-address.ns1.nn1</name>
 20 | 		<value>node01:50070</value>
 21 | 	</property>
 22 | 	<!-- nn2的RPC通信地址 -->
 23 | 	<property>
 24 | 		<name>dfs.namenode.rpc-address.ns1.nn2</name>
 25 | 		<value>node02:9000</value>
 26 | 	</property>
 27 | 	<!-- nn2的http通信地址 -->
 28 | 	<property>
 29 | 		<name>dfs.namenode.http-address.ns1.nn2</name>
 30 | 		<value>node02:50070</value>
 31 | 	</property>
 32 | 	<property>
 33 | 		<name>dfs.namenode.secondary.http-address</name>
 34 | 		<value>node01:50090</value>
 35 | 	</property>
 36 | 	<property>
 37 | 		<name>dfs.namenode.http-address</name>
 38 | 		<value>node01:50070</value>
 39 | 	</property>
 40 | 	<property>
 41 | 		<name>dfs.namenode.name.dir</name>
 42 | 		<value>file:///hadoopadmin/datadir/hadoop/namenodeDatas</value>
 43 | 	</property>
 44 | 	<!--  定义dataNode数据存储的节点位置，实际工作中，一般先确定磁盘的挂载目录，然后多个目录用，进行分割  -->
 45 | 	<property>
 46 | 		<name>dfs.datanode.data.dir</name>
 47 | 		<value>file:///hadoopadmin/datadir/hadoop/datanodeDatas</value>
 48 | 	</property>
 49 | 	<property>
 50 | 		<name>dfs.namenode.edits.dir</name>
 51 | 		<value>file:///hadoopadmin/datadir/hadoop/dfs/nn/edits</value>
 52 | 	</property>
 53 | 	<property>
 54 | 		<name>dfs.namenode.checkpoint.dir</name>
 55 | 		<value>file:///hadoopadmin/datadir/hadoop/dfs/snn/name</value>
 56 | 	</property>
 57 | 	<property>
 58 | 		<name>dfs.namenode.checkpoint.edits.dir</name>
 59 | 		<value>file:///hadoopadmin/datadir/hadoop/dfs/nn/snn/edits</value>
 60 | 	</property>
 61 | 	<property>
 62 | 		<name>dfs.replication</name>
 63 | 		<value>3</value>
 64 | 	</property>
 65 | 	<property>
 66 | 		<name>dfs.permissions</name>
 67 | 		<value>false</value>
 68 | 	</property>
 69 | 	<property>
 70 | 		<name>dfs.blocksize</name>
 71 | 		<value>134217728</value>
 72 | 	</property>
 73 | 	<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
 74 | 	<property>
 75 | 		<name>dfs.namenode.shared.edits.dir</name>
 76 | 		<value>qjournal://node01:8485;node02:8485;node03:8485/ns1</value>
 77 | 	</property>
 78 | 	<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
 79 | 	<property>
 80 | 		<name>dfs.journalnode.edits.dir</name>
 81 | 		<value>/hadoopadmin/datadir/hadoop/journal</value>
 82 | 	</property>
 83 | 	<!-- 开启NameNode失败自动切换 -->
 84 | 	<property>
 85 | 		<name>dfs.ha.automatic-failover.enabled</name>
 86 | 		<value>true</value>
 87 | 	</property>
 88 | 	<!-- 配置失败自动切换实现方式 -->
 89 | 	<property>
 90 | 		<name>dfs.client.failover.proxy.provider.ns1</name>
 91 | 		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
 92 | 	</property>
 93 | 	<!-- 配置隔离机制方法，多个机制用换行分割，即每个机制暂用一行-->
 94 | 	<property>
 95 | 		<name>dfs.ha.fencing.methods</name>
 96 | 		<value>
 97 | 				sshfence
 98 | 				shell(/bin/true)
 99 | 		</value>
100 | 	</property>
101 | 	<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
102 | 	<property>
103 | 		<name>dfs.ha.fencing.ssh.private-key-files</name>
104 | 		<value>/home/hadoopadmin/.ssh/id_rsa</value>
105 | 	</property>
106 | 	<!-- 配置sshfence隔离机制超时时间 -->
107 | 	<property>
108 | 		<name>dfs.ha.fencing.ssh.connect-timeout</name>
109 | 		<value>30000</value>
110 | 	</property>
111 | </configuration>


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/config/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <!--指定运行mapreduce的环境是yarn -->
 2 | <configuration>
 3 |     <property>
 4 |         <name>mapreduce.framework.name</name>
 5 |         <value>yarn</value>
 6 |     </property>
 7 |     <property>
 8 |         <name>mapreduce.job.ubertask.enable</name>
 9 |         <value>true</value>
10 |     </property>
11 |     <property>
12 |         <name>mapreduce.jobhistory.address</name>
13 |         <value>node01:10020</value>
14 |     </property>
15 |     <property>
16 |         <name>mapreduce.jobhistory.webapp.address</name>
17 |         <value>node01:19888</value>
18 |     </property>
19 | </configuration>


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/config/yarn-site.xml:
--------------------------------------------------------------------------------
  1 | <configuration>
  2 | 	<!--开启RM高可用-->
  3 | 	<property>
  4 | 		<name>yarn.resourcemanager.ha.enabled</name>
  5 | 		<value>true</value>
  6 | 	</property>
  7 | 	<!--指定yarn集群id 否则会报Configuration doesn't specify yarn.resourcemanager.cluster-id -->
  8 | 	<property>
  9 | 		<name>yarn.resourcemanager.cluster-id</name>
 10 | 		<value>cluster1</value>
 11 | 	</property>
 12 | 	<!-- 指定RM的名字 -->
 13 | 	<property>
 14 | 		<name>yarn.resourcemanager.ha.rm-ids</name>
 15 | 		<value>rm1,rm2</value>
 16 | 	</property>
 17 | 	<!-- 分别指定RM的地址 -->
 18 | 	<property>
 19 | 		<name>yarn.resourcemanager.hostname.rm1</name>
 20 | 		<value>node01</value>
 21 | 	</property>
 22 | 	<property>
 23 | 		<name>yarn.resourcemanager.hostname.rm2</name>
 24 | 		<value>node02</value>
 25 | 	</property>
 26 | 	<property>
 27 | 		<name>yarn.resourcemanager.recovery.enabled</name>
 28 | 		<value>true</value>
 29 | 	</property>
 30 | 	<property>
 31 | 		<name>yarn.resourcemanager.store.class</name>
 32 | 		<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
 33 | 	</property>
 34 | 	<!-- 指定zk集群地址 -->
 35 | 	<property>
 36 | 		<name>yarn.resourcemanager.zk-address</name>
 37 | 		<value>node01:2181,node02:2181,node03:2181</value>
 38 | 	</property>
 39 | 	<property>
 40 | 		<name>yarn.nodemanager.aux-services</name>
 41 | 		<value>mapreduce_shuffle</value>
 42 | 	</property>
 43 | 	<property>
 44 | 		<name>yarn.log-aggregation-enable</name>
 45 | 		<value>true</value>
 46 | 	</property>
 47 | 	<property>
 48 | 		<name>yarn.log.server.url</name>
 49 | 		<value>http://node01:19888/jobhistory/logs</value>
 50 | 	</property>
 51 | 	<!--多长时间聚合删除一次日志 此处-->
 52 | 	<property>
 53 | 		<name>yarn.log-aggregation.retain-seconds</name>
 54 | 		<value>2592000</value>
 55 | 		<!--30 day-->
 56 | 	</property>
 57 | 	<!--时间在几秒钟内保留用户日志。只适用于如果日志聚合是禁用的-->
 58 | 	<property>
 59 | 		<name>yarn.nodemanager.log.retain-seconds</name>
 60 | 		<value>604800</value>
 61 | 		<!--7 day-->
 62 | 	</property>
 63 | 	<!--指定文件压缩类型用于压缩汇总日志-->
 64 | 	<property>
 65 | 		<name>yarn.nodemanager.log-aggregation.compression-type</name>
 66 | 		<value>gz</value>
 67 | 	</property>
 68 | 	<!-- nodemanager本地文件存储目录-->
 69 | 	<property>
 70 | 		<name>yarn.nodemanager.local-dirs</name>
 71 | 		<value>/hadoopadmin/datadir/hadoop/yarn/local</value>
 72 | 	</property>
 73 | 	<!-- resourceManager  保存最大的任务完成个数 -->
 74 | 	<property>
 75 | 		<name>yarn.resourcemanager.max-completed-applications</name>
 76 | 		<value>1000</value>
 77 | 	</property>
 78 | 	<!-- <property>
 79 | 		<name>yarn.nodemanager.resource.memory-mb</name>
 80 | 		<value>1024</value>
 81 | 	</property>
 82 | 	<property>
 83 | 		<name>yarn.app.mapreduce.am.resource.mb</name>
 84 | 		<value>512</value>
 85 | 	</property>
 86 | 	<property>
 87 | 		<name>yarn.scheduler.minimum-allocation-mb</name>
 88 | 		<value>256</value>
 89 | 	</property>
 90 | 	<property>
 91 | 		<name>yarn.scheduler.maximum-allocation-mb</name>
 92 | 		<value>1024</value>
 93 | 	</property>
 94 | 	<property>
 95 | 		<name>yarn.app.mapreduce.am.resource.mb</name>
 96 | 		<value>512</value>
 97 | 	</property>
 98 | 	<property>
 99 | 		<name>yarn.nodemanager.resource.cpu-vcores</name>
100 | 		<value>1</value>
101 | 	</property>
102 | 	<property>
103 | 		<name>yarn.scheduler.maximum-allocation-vcores</name>
104 | 		<value>2</value>
105 | 	</property> -->
106 | </configuration>


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/hdfs/HDFSClient.java:
--------------------------------------------------------------------------------
  1 | package com.tools.hadoop.hdfs;
  2 | 
  3 | import lombok.extern.slf4j.Slf4j;
  4 | import org.apache.commons.compress.utils.IOUtils;
  5 | import org.apache.hadoop.conf.Configuration;
  6 | import org.apache.hadoop.fs.*;
  7 | import org.junit.Before;
  8 | import org.junit.Test;
  9 | 
 10 | import java.io.FileInputStream;
 11 | import java.io.FileOutputStream;
 12 | import java.io.IOException;
 13 | import java.net.URI;
 14 | import java.util.Iterator;
 15 | import java.util.Map.Entry;
 16 | import java.util.stream.Stream;
 17 | 
 18 | @Slf4j
 19 | public class HDFSClient {
 20 | 
 21 |     private FileSystem fileSystem = null;
 22 |     private Configuration configuration = null;
 23 | 
 24 |     private static final String DFS_PATH = "/hadoop/yarn-site.xml";
 25 |     private static final String LOCAL_PATH = "src/main/java/com/tools/hadoop/config/yarn-site.xml";
 26 | 
 27 |     @Before
 28 |     public void init() throws Exception {
 29 |         configuration = new Configuration();
 30 | 
 31 |         // ******注意：以上配置都是由由就近原则进行配置调用******
 32 |         // configuration.set > 自定义配置文件 > jar中配置文件 >  服务端配置
 33 |         fileSystem = FileSystem.get(new URI("hdfs://spark02:9000"), configuration, "hadoopadmin");
 34 |         // 问题：配置HA，为什么在链接standby的namenode时会报错。
 35 | 
 36 |     }
 37 | 
 38 |     //流方式下载
 39 |     /**
 40 |      * upload file by stream
 41 |      */
 42 |     @Test
 43 |     public void uploadByIO() throws IllegalArgumentException, IOException {
 44 |         FSDataOutputStream dataOutputStream = fileSystem.create(new Path(DFS_PATH), true);
 45 |         FileInputStream fileInputStream = new FileInputStream(LOCAL_PATH);
 46 |         IOUtils.copy(fileInputStream, dataOutputStream);
 47 |     }
 48 | 
 49 |     /**
 50 |      * download file by stream
 51 |      */
 52 |     @Test
 53 |     public void downloadByIO() throws IllegalArgumentException, IOException {
 54 |         FSDataInputStream open = fileSystem.open(new Path(DFS_PATH));
 55 |         FileOutputStream fileOutputStream = new FileOutputStream(LOCAL_PATH);
 56 |         IOUtils.copy(open, fileOutputStream);
 57 |     }
 58 | 
 59 |     /**
 60 |      * 通过流的方式将文件打印到屏幕上
 61 |      */
 62 |     @Test
 63 |     public void downloadByIOToDisplay() throws IllegalArgumentException, IOException {
 64 |         FSDataInputStream open = fileSystem.open(new Path(DFS_PATH));
 65 |         IOUtils.copy(open, System.out);
 66 |     }
 67 | 
 68 | 
 69 |     // client方式
 70 |     /**
 71 |      * download file
 72 |      */
 73 |     @Test
 74 |     public void downloadCommand() throws IllegalArgumentException, IOException {
 75 |         //在文件拷贝与上传的过程中路径要精确到文件名
 76 |         fileSystem.copyToLocalFile(new Path(DFS_PATH), new Path(LOCAL_PATH));
 77 |         fileSystem.close();
 78 |     }
 79 | 
 80 | 
 81 |     /**
 82 |      * upload file
 83 |      */
 84 |     @Test
 85 |     public void uploadCommand() {
 86 |         try {
 87 |             // hdfs路径要精确到文件名。
 88 |             fileSystem.copyFromLocalFile(new Path(LOCAL_PATH), new Path(DFS_PATH));
 89 |             fileSystem.close();
 90 |         } catch (IOException e) {
 91 |             log.error("upload file to hdfs failed :{}", e.getMessage());
 92 |         }
 93 |     }
 94 | 
 95 |     /**
 96 |      * get cluster config
 97 |      */
 98 |     @Test
 99 |     public void getConfiguration() {
100 |         Iterator<Entry<String, String>> iterator = configuration.iterator();
101 |         while (iterator.hasNext()) {
102 |             System.out.println("name:" + iterator.next().getKey() + "  ----  value:" + iterator.next().getValue());
103 |         }
104 |         try {
105 |             fileSystem.close();
106 |         } catch (IOException e) {
107 |             log.error("fileSystem close error :{}", e.getMessage());
108 |         }
109 |     }
110 | 
111 |     /**
112 |      * mkdir on hdfs
113 |      */
114 |     @Test
115 |     public void mkdirOnHDFS() {
116 |         try {
117 |             Boolean mkdirRes = fileSystem.mkdirs(new Path(DFS_PATH));
118 |             fileSystem.close();
119 |             System.out.println(mkdirRes);
120 |         } catch (IOException e) {
121 |             log.error("make directory on hdfs failed :{}", e.getMessage());
122 |         }
123 |         /**
124 |          * output:
125 |          *  true/false
126 |          */
127 |     }
128 | 
129 | 
130 |     /**
131 |      * delete file or directory
132 |      */
133 |     @Test
134 |     public void deleteFromHDFS() {
135 |         try {
136 |             System.out.println(fileSystem.delete(new Path(DFS_PATH), true));
137 |             fileSystem.close();
138 |         } catch (IOException e) {
139 |             log.error("delete file or directory failed :{}", e.getMessage());
140 |         }
141 |         /**
142 |          * output:
143 |          *  true/false
144 |          */
145 |     }
146 | 
147 | 
148 |     /**
149 |      *
150 |      * get all file or dir at path
151 |      */
152 |     @Test
153 |     public void listFileFromHDFS(){
154 |         FileStatus[] fs = new FileStatus[0];
155 |         try {
156 |             fs = fileSystem.listStatus(new Path("/hadoop"));
157 |         } catch (IOException e) {
158 |             log.error("get all file or dir error :{}",e.getMessage());
159 |         }
160 |         Path[] listPath = FileUtil.stat2Paths(fs);
161 |         Stream.of(listPath).forEach(path -> System.out.println(path.getName()));
162 |         /**
163 |          * output：
164 |          *   core-site.xml
165 |          *   hdfs-site.xml
166 |          * */
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/hdfs/HDFSStream.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.hdfs;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.commons.compress.utils.IOUtils;
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.fs.FSDataInputStream;
 7 | import org.apache.hadoop.fs.FSDataOutputStream;
 8 | import org.apache.hadoop.fs.FileSystem;
 9 | import org.apache.hadoop.fs.Path;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 | 
13 | import java.io.FileInputStream;
14 | import java.io.FileOutputStream;
15 | import java.io.IOException;
16 | import java.net.URI;
17 | 
18 | @Slf4j
19 | public class HDFSStream {
20 | 
21 |     private FileSystem fileSystem = null;
22 | 
23 |     private Configuration configuration = null;
24 | 
25 |     private static final String DFS_PATH = "/hadoop/yarn-site.xml";
26 |     private static final String LOCAL_PATH = "src/main/java/com/tools/hadoop/config/yarn-site.xml";
27 | 
28 |     @Before
29 |     public void init() throws Exception {
30 |         configuration = new Configuration();
31 |         fileSystem = FileSystem.get(new URI("hdfs://spark02:9000"), configuration, "hadoopadmin");
32 |     }
33 | 
34 | 
35 |     /**
36 |      * upload file by stream
37 |      */
38 |     @Test
39 |     public void uploadByIO() throws IllegalArgumentException, IOException {
40 |         FSDataOutputStream dataOutputStream = fileSystem.create(new Path(DFS_PATH), true);
41 |         FileInputStream fileInputStream = new FileInputStream(LOCAL_PATH);
42 |         IOUtils.copy(fileInputStream, dataOutputStream);
43 |     }
44 | 
45 |     /**
46 |      * download file by stream
47 |      */
48 |     @Test
49 |     public void downloadByIO() throws IllegalArgumentException, IOException {
50 |         FSDataInputStream open = fileSystem.open(new Path(DFS_PATH));
51 |         FileOutputStream fileOutputStream = new FileOutputStream(LOCAL_PATH);
52 |         IOUtils.copy(open, fileOutputStream);
53 |     }
54 | 
55 |     /**
56 |      * 通过流的方式将文件打印到屏幕上
57 |      */
58 |     @Test
59 |     public void downloadByIOToDisplay() throws IllegalArgumentException, IOException {
60 |         FSDataInputStream open = fileSystem.open(new Path(DFS_PATH));
61 |         IOUtils.copy(open, System.out);
62 |     }
63 | 
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/hdfs/UploadFileToHdfsByCompress.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.hdfs;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.fs.FSDataOutputStream;
 6 | import org.apache.hadoop.fs.FileSystem;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.IOUtils;
 9 | import org.apache.hadoop.io.compress.BZip2Codec;
10 | import org.apache.hadoop.io.compress.CompressionOutputStream;
11 | import org.junit.Test;
12 | 
13 | import java.io.BufferedInputStream;
14 | import java.io.FileInputStream;
15 | import java.io.IOException;
16 | import java.net.URI;
17 | 
18 | @Slf4j
19 | public class UploadFileToHdfsByCompress {
20 | 
21 |     private static final String DFS_PATH = "/hadoop/yarn-site.xml";
22 |     private static final String LOCAL_PATH = "src/main/java/com/tools/hadoop/config/yarn-site.xml";
23 |     private static final String url = "hdfs://spark02:9000";
24 | 
25 |     @Test
26 |     public void uploadByCompress() {
27 |         Configuration configuration = new Configuration();
28 |         BZip2Codec codec = new BZip2Codec();
29 |         codec.setConf(configuration);
30 |         try {
31 |             BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(LOCAL_PATH));
32 |             FileSystem fileSystem = FileSystem.get(URI.create(url), configuration, "hadoopadmin");
33 |             FSDataOutputStream outputStream = fileSystem.create(new Path(DFS_PATH));
34 | 
35 |             CompressionOutputStream codecOutputStream = codec.createOutputStream(outputStream);
36 |             IOUtils.copyBytes(inputStream, codecOutputStream, configuration);
37 |             log.info("upload success, local path: {}, hdfs path: {}", LOCAL_PATH, DFS_PATH);
38 |         } catch (InterruptedException | IOException e) {
39 |             log.error("upload error:{}", e.getMessage());
40 |         }
41 | 
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/HFileGenerator.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr;
 2 | 
 3 | import com.tools.hbase.Utils;
 4 | import lombok.extern.slf4j.Slf4j;
 5 | import org.apache.hadoop.conf.Configuration;
 6 | import org.apache.hadoop.fs.FileSystem;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.hbase.TableName;
 9 | import org.apache.hadoop.hbase.client.Connection;
10 | import org.apache.hadoop.hbase.client.ConnectionFactory;
11 | import org.apache.hadoop.hbase.client.Table;
12 | import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
13 | import org.apache.hadoop.mapred.FileInputFormat;
14 | import org.apache.hadoop.mapred.FileOutputFormat;
15 | import org.apache.hadoop.mapred.JobConf;
16 | import org.apache.hadoop.mapreduce.Job;
17 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
18 | 
19 | import java.io.IOException;
20 | import java.net.URI;
21 | import java.util.Objects;
22 | import java.util.UUID;
23 | 
24 | /**
25 |  * bulk load like a ETL
26 |  * - Extract: from text file or another database into HDFS
27 |  * - Transform: data into HFile(HBase's own file format)
28 |  * - Load: load the HFile into HBase and tell region server where to find them
29 |  */
30 | 
31 | @Slf4j
32 | public class HFileGenerator {
33 | 
34 |     public static void main(String[] args) {
35 |         if (args.length < 4) {
36 |             System.err.println("Usage：hadoop jar HFileGenerator.jar inputPath outputPath tableName configPath");
37 |             System.exit(0);
38 |         }
39 |         Job job = createJob(args[0], args[1], args[2], args[3]);
40 |         if (Objects.isNull(job)) {
41 |             log.error("error in create job!");
42 |         }
43 |         try {
44 |             if (job.waitForCompletion(true)) {
45 |                 log.info("execute job finish!");
46 |                 Utils.doBulkLoad(job.getConfiguration(), args[1], args[2]);
47 |             } else {
48 |                 log.error("execute job failed!!");
49 |             }
50 |         } catch (IOException | InterruptedException | ClassNotFoundException e) {
51 |             e.printStackTrace();
52 |         }
53 | 
54 |     }
55 | 
56 |     public static Job createJob(String inputPath, String outputPath, String tableName, String configPath) {
57 |         Configuration configuration = new Configuration();
58 |         configuration.addResource(new Path(configPath));
59 |         configuration.set("hbase.fs.tmp.dir", "partition_" + UUID.randomUUID());
60 |         Job job = null;
61 |         try {
62 |             try {
63 |                 FileSystem fileSystem = FileSystem.get(URI.create(outputPath), configuration);
64 |                 fileSystem.delete(new Path(outputPath), true);
65 |                 fileSystem.close();
66 |             } catch (IOException e) {
67 |                 e.printStackTrace();
68 |             }
69 |             Connection connection = ConnectionFactory.createConnection(configuration);
70 |             Table table = connection.getTable(TableName.valueOf(tableName));
71 |             job = Job.getInstance(configuration);
72 |             job.setJobName("HFileGenerator Job");
73 | 
74 |             job.setJarByClass(HFileGenerator.class);
75 |             job.setOutputFormatClass(TextOutputFormat.class);
76 |             job.setMapperClass(HFileImportMapper.class);
77 |             FileInputFormat.setInputPaths(new JobConf(configuration), inputPath);
78 |             FileOutputFormat.setOutputPath(new JobConf(configuration), new Path(outputPath));
79 | 
80 |             HFileOutputFormat2.configureIncrementalLoad(job, table, connection.getRegionLocator(TableName.valueOf(tableName)));
81 |         } catch (Exception e) {
82 | 
83 |         }
84 |         return job;
85 |     }
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/HFileImportMapper.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.hadoop.hbase.KeyValue;
 5 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 6 | import org.apache.hadoop.hbase.util.Bytes;
 7 | import org.apache.hadoop.io.LongWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Mapper;
10 | 
11 | import java.io.IOException;
12 | import java.time.LocalDate;
13 | import java.time.format.DateTimeFormatter;
14 | 
15 | @Slf4j
16 | public class HFileImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {
17 | 
18 |     protected final String CF_KQ = "cf";
19 | 
20 |     @Override
21 |     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
22 |         String line = value.toString();
23 |         log.info("read line: {}", line);
24 |         String[] strings = line.split(" ");
25 |         String row = LocalDate.now().format(DateTimeFormatter.BASIC_ISO_DATE) + "_" + strings[1];
26 |         ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes(row));
27 |         KeyValue keyValue = new KeyValue(Bytes.toBytes(row), this.CF_KQ.getBytes(), strings[1].getBytes(), strings[2].getBytes());
28 |         context.write(writable, keyValue);
29 |     }
30 | }


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/dataclean/DataClean.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.dataclean;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.io.NullWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 | import org.apache.hadoop.mapreduce.Counter;
11 | import org.apache.hadoop.mapreduce.Job;
12 | import org.apache.hadoop.mapreduce.Mapper;
13 | 
14 | import java.io.IOException;
15 | 
16 | public class DataClean {
17 |     /**
18 |      *
19 |      * 注意：若要IDEA中，本地运行MR程序，需要将resources/mapred-site.xml中的mapreduce.framework.name属性值，设置成local
20 |      * @param args
21 |      */
22 |     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
23 | 
24 |         //判断一下，输入参数是否是两个，分别表示输入路径、输出路径
25 |         if (args == null || args.length != 2) {
26 |             System.out.println("please input Path!");
27 |             System.exit(0);
28 |         }
29 | 
30 |         Configuration configuration = new Configuration();
31 | 
32 |         //调用getInstance方法，生成job实例
33 |         Job job = Job.getInstance(configuration, DataClean.class.getSimpleName());
34 | 
35 |         //设置jar包，参数是包含main方法的类
36 |         job.setJarByClass(DataClean.class);
37 | 
38 |         //设置输入/输出路径
39 |         FileInputFormat.setInputPaths(job, new Path(args[0]));
40 |         FileOutputFormat.setOutputPath(job, new Path(args[1]));
41 | 
42 |         //设置处理Map阶段的自定义的类
43 |         job.setMapperClass(DataCleanMapper.class);
44 | 
45 |         //注意：此处设置的map输出的key/value类型，一定要与自定义map类输出的kv对类型一致；否则程序运行报错
46 |         job.setMapOutputKeyClass(Text.class);
47 |         job.setMapOutputValueClass(NullWritable.class);
48 | 
49 |         //注意：因为不需要reduce聚合阶段，所以，需要显示设置reduce task个数是0
50 |         job.setNumReduceTasks(0);
51 | 
52 |         // 提交作业
53 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
54 |     }
55 | 
56 |     /**
57 |      *
58 |      * 自定义mapper类
59 |      * 注意：若自定义的mapper类，与main方法在同一个类中，需要将自定义mapper类，声明成static的
60 |      */
61 |     public static class DataCleanMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
62 |         NullWritable nullValue = NullWritable.get();
63 | 
64 |         @Override
65 |         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
66 |             //自定义计数器，用于记录残缺记录数
67 |             Counter counter = context.getCounter("DataCleaning", "damagedRecord");
68 | 
69 |             //获得当前行数据
70 |             //样例数据：20111230111645  169796ae819ae8b32668662bb99b6c2d        塘承高速公路规划线路图  1       1       http://auto.ifeng.com/roll/20111212/729164.shtml
71 |             String line = value.toString();
72 | 
73 |             String[] fields = line.split("\t");
74 | 
75 |             if(fields.length != 6) {
76 |                 //若不是，则不输出，并递增自定义计数器
77 |                 counter.increment(1L);
78 |             } else {
79 |                 //若是6，则原样输出
80 |                 context.write(value, nullValue);
81 |             }
82 |         }
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/duplicate/DuplicateRemoval.java:
--------------------------------------------------------------------------------
  1 | package com.tools.hadoop.mr.duplicate;
  2 | 
  3 | import org.apache.hadoop.conf.Configuration;
  4 | import org.apache.hadoop.fs.FileSystem;
  5 | import org.apache.hadoop.fs.Path;
  6 | import org.apache.hadoop.io.Text;
  7 | import org.apache.hadoop.mapreduce.Job;
  8 | import org.apache.hadoop.mapreduce.Mapper;
  9 | import org.apache.hadoop.mapreduce.Reducer;
 10 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 11 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 12 | import org.slf4j.Logger;
 13 | import org.slf4j.LoggerFactory;
 14 | 
 15 | import java.io.IOException;
 16 | 
 17 | /**
 18 |  * Title:MR
 19 |  * description:
 20 |  *
 21 |  * @author: Lijiaqi
 22 |  * @version: 1.0
 23 |  * @create: 2018-10-29 16:20
 24 |  **/
 25 | 
 26 | public class DuplicateRemoval {
 27 |     private final static Logger logger = LoggerFactory.getLogger(DuplicateRemoval.class);
 28 | 
 29 |     /**
 30 |      * map将输入中的value复制到输出数据的key上，并直接输出
 31 |      */
 32 |     public static class Map extends Mapper<Object, Text, Text, Text> {
 33 | 
 34 |         /**
 35 |          * 实现map函数
 36 |          */
 37 |         @Override
 38 |         public void map(Object key, Text value, Context context)
 39 | 
 40 |                 throws IOException, InterruptedException {
 41 |             context.write(value, new Text(""));
 42 |         }
 43 |     }
 44 | 
 45 |     /**
 46 |      * reduce将输入中的key复制到输出数据的key上，并直接输出
 47 |      */
 48 |     public static class Reduce extends Reducer<Text, Text, Text, Text> {
 49 |         /**
 50 |          * 实现reduce函数
 51 |          */
 52 |         @Override
 53 |         public void reduce(Text key, Iterable<Text> values, Context context)
 54 |                 throws IOException, InterruptedException {
 55 |             context.write(key, new Text(""));
 56 |         }
 57 |     }
 58 | 
 59 |     /**
 60 |      * 入口方法
 61 |      */
 62 |     public static void main(String[] args) throws Exception {
 63 | 
 64 |         //默认不做任何配置,都由配置文件中加载
 65 |         Configuration conf = new Configuration();
 66 |         /**
 67 |          * 默认入口携带第一个参数为输入路径,第二个为输出路径
 68 |          */
 69 |         Path path = new Path(args[1]);
 70 | 
 71 |         //从配置文件中获取当前文件系统,判断是os还是hdfs
 72 |         FileSystem fs = FileSystem.get(conf);
 73 | 
 74 |         if (fs.exists(path)) {
 75 |             logger.error("Usage: Data Deduplication <in> <out>");
 76 |             //当输出文件已经存在时,进行删除
 77 |             fs.delete(path, true);
 78 |             System.exit(2);
 79 |         }
 80 | 
 81 |         //Job job = new Job(conf);//已经不推荐使用
 82 |         //推荐使用当前方式
 83 |         Job job = Job.getInstance(conf, "DuplicateRemoval");
 84 |         job.setJarByClass(DuplicateRemoval.class);
 85 | 
 86 |         //设置输入和输出目录
 87 |         FileInputFormat.setInputPaths(job, new Path(args[0]));
 88 |         FileOutputFormat.setOutputPath(job, path);
 89 | 
 90 |         //设置Map处理类
 91 |         job.setMapperClass(Map.class);
 92 |         job.setMapOutputKeyClass(Text.class);
 93 |         job.setMapOutputValueClass(Text.class);
 94 | 
 95 |         //设置Combine和Reduce处理类
 96 |         job.setCombinerClass(Reduce.class);
 97 |         job.setReducerClass(Reduce.class);
 98 | 
 99 |         //设置输出类型
100 |         job.setOutputKeyClass(Text.class);
101 |         job.setOutputValueClass(Text.class);
102 | 
103 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
104 | 
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/inputformat/SmallFiles2SequenceFile.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.inputformat;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.conf.Configured;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.BytesWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
10 | import org.apache.hadoop.util.Tool;
11 | import org.apache.hadoop.util.ToolRunner;
12 | 
13 | public class SmallFiles2SequenceFile extends Configured implements Tool {
14 | 
15 |     public static void main(String[] args) throws Exception {
16 |         int code = ToolRunner.run(new SmallFiles2SequenceFile(), args);
17 |         System.exit(code);
18 |     }
19 | 
20 |     @Override
21 |     public int run(String[] strings) throws Exception {
22 |         Configuration configuration = new Configuration();
23 | 
24 |         configuration.set("mapreduce.map.output.compress", "true");
25 |         //设置map输出的压缩算法是：BZip2Codec，它是hadoop默认支持的压缩算法，且支持切分
26 |         configuration.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
27 |         //开启job输出压缩功能
28 |         configuration.set("mapreduce.output.fileoutputformat.compress", "true");
29 |         //指定job输出使用的压缩算法
30 |         configuration.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
31 | 
32 |         Job job = Job.getInstance(configuration, SmallFiles2SequenceFile.class.getName());
33 | 
34 |         job.setJarByClass(SmallFiles2SequenceFile.class);
35 | 
36 |         job.setMapperClass(SmallFiles2SequenceMapper.class);
37 | 
38 |         job.setInputFormatClass(WholeFileInputFormat.class);
39 | 
40 |         WholeFileInputFormat.addInputPath(job, new Path(strings[0]));
41 | 
42 |         job.setOutputFormatClass(SequenceFileOutputFormat.class);
43 | 
44 |         SequenceFileOutputFormat.setOutputPath(job, new Path(strings[1]));
45 | 
46 |         job.setOutputKeyClass(Text.class);
47 |         job.setOutputValueClass(BytesWritable.class);
48 | 
49 |         return job.waitForCompletion(true) ? 0 : 1;
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/inputformat/SmallFiles2SequenceMapper.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.inputformat;
 2 | 
 3 | import org.apache.hadoop.fs.Path;
 4 | import org.apache.hadoop.io.BytesWritable;
 5 | import org.apache.hadoop.io.NullWritable;
 6 | import org.apache.hadoop.io.Text;
 7 | import org.apache.hadoop.mapreduce.InputSplit;
 8 | import org.apache.hadoop.mapreduce.Mapper;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
10 | 
11 | import java.io.IOException;
12 | 
13 | public class SmallFiles2SequenceMapper extends Mapper<NullWritable, BytesWritable, Text, BytesWritable> {
14 | 
15 |     private Text filenameKey;
16 | 
17 |     @Override
18 |     protected void setup(Context context) {
19 |         InputSplit inputSplit = context.getInputSplit();
20 |         Path path = ((FileSplit) inputSplit).getPath();
21 |         filenameKey = new Text(path.toString());
22 |     }
23 | 
24 |     @Override
25 |     protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException {
26 |         context.write(new Text(filenameKey), value);
27 |     }
28 | 
29 |     @Override
30 |     protected void cleanup(Context context) throws IOException, InterruptedException {
31 |         super.cleanup(context);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/inputformat/WholeFileInputFormat.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.inputformat;
 2 | 
 3 | import org.apache.hadoop.fs.Path;
 4 | import org.apache.hadoop.mapreduce.InputSplit;
 5 | import org.apache.hadoop.mapreduce.JobContext;
 6 | import org.apache.hadoop.mapreduce.RecordReader;
 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 8 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 | 
10 | 
11 | /**
12 |  * 自定义inputformat实现先文件合并
13 |  */
14 | public class WholeFileInputFormat extends FileInputFormat {
15 |     @Override
16 |     protected boolean isSplitable(JobContext context, Path filename) {
17 |         return false;
18 |     }
19 | 
20 |     @Override
21 |     public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
22 |         WholeRecordReader wholeRecordReader = new WholeRecordReader();
23 |         wholeRecordReader.initialize(inputSplit, taskAttemptContext);
24 |         return wholeRecordReader;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/inputformat/WholeRecordReader.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.inputformat;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FSDataInputStream;
 5 | import org.apache.hadoop.fs.FileSystem;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.BytesWritable;
 8 | import org.apache.hadoop.io.IOUtils;
 9 | import org.apache.hadoop.io.NullWritable;
10 | import org.apache.hadoop.mapreduce.InputSplit;
11 | import org.apache.hadoop.mapreduce.RecordReader;
12 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
13 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
14 | 
15 | import java.io.IOException;
16 | 
17 | public class WholeRecordReader extends RecordReader<NullWritable, BytesWritable> {
18 | 
19 |     private FileSplit fileSplit;
20 | 
21 |     private Configuration configuration;
22 | 
23 |     private BytesWritable value = new BytesWritable();
24 | 
25 |     private boolean processed = false;
26 | 
27 | 
28 |     @Override
29 |     public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
30 |         this.fileSplit = (FileSplit) inputSplit;
31 |         this.configuration = taskAttemptContext.getConfiguration();
32 |     }
33 | 
34 |     @Override
35 |     public boolean nextKeyValue() throws IOException, InterruptedException {
36 |         if (!processed) {
37 |             byte[] bytes = new byte[(int) fileSplit.getLength()];
38 |             Path path = fileSplit.getPath();
39 |             FileSystem fileSystem = path.getFileSystem(configuration);
40 |             FSDataInputStream open = fileSystem.open(path);
41 |             IOUtils.readFully(open, bytes, 0, bytes.length);
42 |             value.set(bytes, 0, bytes.length);
43 |             IOUtils.closeStream(open);
44 |             processed = true;
45 |             return true;
46 |         }
47 |         return false;
48 |     }
49 | 
50 |     @Override
51 |     public NullWritable getCurrentKey() throws IOException, InterruptedException {
52 |         return NullWritable.get();
53 |     }
54 | 
55 |     @Override
56 |     public BytesWritable getCurrentValue() throws IOException, InterruptedException {
57 |         return value;
58 |     }
59 | 
60 |     @Override
61 |     public float getProgress() throws IOException, InterruptedException {
62 |         return processed ? 1.0f : 0.0f;
63 |     }
64 | 
65 |     @Override
66 |     public void close() throws IOException {
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/outputformat/MyFileOutputFormat.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.outputformat;
 2 | 
 3 | import org.apache.hadoop.fs.FSDataOutputStream;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.NullWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.RecordWriter;
 9 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 | 
12 | import java.io.IOException;
13 | 
14 | public class MyFileOutputFormat extends FileOutputFormat<Text, NullWritable> {
15 |     //与reduce的输出泛型一致
16 | 
17 | 
18 |     @Override
19 |     public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
20 |         FileSystem fileSystem = FileSystem.get(taskAttemptContext.getConfiguration());
21 | 
22 |         String bad = "hdfs://spark01:8020/outputformat/good/r.txt";
23 |         Path badPath = new Path(bad);
24 |         String good = "hdfs://spark01:8020/outputformat/bad/r.txt";
25 |         Path goodPath = new Path(good);
26 |         FSDataOutputStream badStream = fileSystem.create(badPath);
27 |         FSDataOutputStream goodStream = fileSystem.create(goodPath);
28 |         return new MyRecordWriter(badStream, goodStream);
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/outputformat/MyFileOutputFormatMain.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.outputformat;
 2 | 
 3 | 
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.conf.Configured;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.NullWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.Mapper;
11 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
12 | import org.apache.hadoop.util.Tool;
13 | import org.apache.hadoop.util.ToolRunner;
14 | 
15 | import java.io.IOException;
16 | 
17 | public class MyFileOutputFormatMain extends Configured implements Tool {
18 |     @Override
19 |     public int run(String[] strings) throws Exception {
20 | 
21 |         Configuration configuration = new Configuration();
22 | 
23 |         Job job = Job.getInstance(configuration, MyFileOutputFormatMain.class.getName());
24 | 
25 | 
26 |         job.setJarByClass(MyFileOutputFormatMain.class);
27 | 
28 | 
29 |         TextInputFormat.addInputPath(job, new Path(strings[0]));
30 |         MyFileOutputFormat.setOutputPath(job, new Path(strings[1]));
31 |         job.setMapperClass(MyFileOutputFormatMapper.class);
32 |         job.setOutputFormatClass(MyFileOutputFormat.class);
33 | 
34 |         job.setOutputKeyClass(Text.class);
35 |         job.setOutputValueClass(NullWritable.class);
36 | 
37 |         return job.waitForCompletion(true) ? 0 : 1;
38 | 
39 | 
40 |     }
41 | 
42 |     public static void main(String[] args) throws Exception {
43 |         ToolRunner.run(new MyFileOutputFormatMain(), args);
44 |     }
45 | 
46 |     private static class MyFileOutputFormatMapper extends Mapper {
47 |         @Override
48 |         protected void map(Object key, Object value, Context context) throws IOException, InterruptedException {
49 |             context.write(value, NullWritable.get());
50 |         }
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/outputformat/MyRecordWriter.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.outputformat;
 2 | 
 3 | import org.apache.hadoop.fs.FSDataOutputStream;
 4 | import org.apache.hadoop.io.NullWritable;
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.RecordWriter;
 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 8 | 
 9 | import java.io.IOException;
10 | 
11 | public class MyRecordWriter extends RecordWriter<Text, NullWritable> {
12 |     FSDataOutputStream badOut;
13 |     FSDataOutputStream goodOut;
14 | 
15 |     public MyRecordWriter(FSDataOutputStream badOut, FSDataOutputStream goodOut) {
16 |         this.badOut = badOut;
17 |         this.goodOut = goodOut;
18 |     }
19 | 
20 |     @Override
21 |     public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
22 |         if (text.toString().split("\t")[9].equals("0")) {
23 |             goodOut.write(text.toString().getBytes());
24 |             goodOut.write("\r\n".getBytes());
25 |         } else {
26 |             badOut.write(text.toString().getBytes());
27 |             badOut.write("\r\n".getBytes());
28 |         }
29 |     }
30 | 
31 |     @Override
32 |     public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
33 |         if (goodOut != null) {
34 |             goodOut.close();
35 |         }
36 |         if (badOut != null) {
37 |             badOut.close();
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/MyOrder.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.conf.Configured;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.DoubleWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 | import org.apache.hadoop.util.Tool;
12 | import org.apache.hadoop.util.ToolRunner;
13 | 
14 | /**
15 |  *
16 |  */
17 | public class MyOrder extends Configured implements Tool {
18 | 
19 | 
20 |     @Override
21 |     public int run(String[] strings) throws Exception {
22 | 
23 |         Configuration configuration = new Configuration();
24 | 
25 |         Job job = Job.getInstance(configuration, MyOrder.class.getName());
26 | 
27 |         FileInputFormat.setInputPaths(job, new Path(strings[0]));
28 |         FileOutputFormat.setOutputPath(job, new Path(strings[1]));
29 | 
30 |         job.setJarByClass(MyOrder.class);
31 | 
32 |         job.setMapperClass(MyOrderMapper.class);
33 |         job.setReducerClass(MyOrderReducer.class);
34 | 
35 |         job.setGroupingComparatorClass(MyOrderGroup.class);
36 | 
37 |         //如果map、reduce的输出的kv对类型一致，直接设置reduce的输出的kv对就行；如果不一样，需要分别设置map, reduce的输出的kv类型
38 |         //注意：此处设置的map输出的key/value类型，一定要与自定义map类输出的kv对类型一致；否则程序运行报错
39 |         job.setMapOutputKeyClass(OrderBean.class);
40 |         job.setMapOutputValueClass(DoubleWritable.class);
41 | 
42 |         //设置reduce task最终输出key/value的类型
43 |         //注意：此处设置的reduce输出的key/value类型，一定要与自定义reduce类输出的kv对类型一致；否则程序运行报错
44 |         job.setOutputKeyClass(Text.class);
45 |         job.setOutputValueClass(DoubleWritable.class);
46 | 
47 |         return job.waitForCompletion(true)?0:1;
48 |     }
49 | 
50 |     public static void main(String[] args) throws Exception {
51 |         System.exit(ToolRunner.run(new MyOrder(), args));
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/MyOrderGroup.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import org.apache.hadoop.io.WritableComparable;
 4 | import org.apache.hadoop.io.WritableComparator;
 5 | 
 6 | public class MyOrderGroup extends WritableComparator {
 7 | 
 8 |     public MyOrderGroup(){
 9 |         // 标识当前的key为orderbean
10 |         super(OrderBean.class,true);
11 |     }
12 | 
13 |     @Override
14 |     public int compare(WritableComparable a, WritableComparable b) {
15 |         OrderBean aOrderBean = (OrderBean)a;
16 |         OrderBean bOrderBean = (OrderBean)b;
17 | 
18 |         String aUserId = aOrderBean.getUserid();
19 |         String bUserId = bOrderBean.getUserid();
20 |         //userid、年、月相同的，作为一组
21 |         int ret1 = aUserId.compareTo(bUserId);
22 |         if(ret1 == 0) {//同一用户
23 |             //年月也相同返回0，在同一组；
24 |             return aOrderBean.getDatetime().compareTo(bOrderBean.getDatetime());
25 |         } else {
26 |             return ret1;
27 |         }
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/MyOrderMapper.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import org.apache.hadoop.io.DoubleWritable;
 4 | import org.apache.hadoop.io.LongWritable;
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.Mapper;
 7 | 
 8 | import java.io.IOException;
 9 | import java.time.LocalDateTime;
10 | import java.time.format.DateTimeFormatter;
11 | import java.util.Optional;
12 | 
13 | import static java.time.LocalDateTime.parse;
14 | 
15 | 
16 | /**
17 |  * 泛型描述 <p> 输入key 输入value 输出key 输出value</>
18 |  */
19 | public class MyOrderMapper extends Mapper<LongWritable, Text, OrderBean, DoubleWritable> {
20 | 
21 |     @Override
22 |     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
23 | 
24 |         String[] values = Optional.ofNullable(value).map(v -> v.toString().split("\t")).get();
25 |         String yearMonthString;
26 |         try {
27 |             yearMonthString = getYearMonthString(values[1], "yyyy-MM-dd HH:mm:ss.SSS");
28 |         }catch (Exception e){
29 |             return;
30 |         }
31 |         if (values.length == 6){
32 |             //13764633023     2014-12-01 02:20:42.000 全视目Allseelook 原宿风暴显色美瞳彩色隐形艺术眼镜1片 拍2包邮    33.6    2       18067781305
33 |             OrderBean orderBean = new OrderBean(values[0],
34 |                     yearMonthString,
35 |                     values[2],
36 |                     Double.parseDouble(values[3]),
37 |                     Integer.parseInt(values[4]),
38 |                     values[5]);
39 | 
40 |             DoubleWritable doubleWritable = new DoubleWritable();
41 |             doubleWritable.set(Double.parseDouble(values[3])*Integer.parseInt(values[4]));
42 |             context.write(orderBean, doubleWritable);
43 |         }
44 |     }
45 | 
46 |     public static String getYearMonthString(String dateTime, String pattern) {
47 |         DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
48 |         LocalDateTime localDateTime = parse(dateTime, formatter);
49 |         int year = localDateTime.getYear();
50 |         int month = localDateTime.getMonthValue();
51 |         return year + "" + month;
52 |     }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/MyOrderReducer.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import org.apache.hadoop.io.DoubleWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Reducer;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | public class MyOrderReducer extends Reducer<OrderBean, DoubleWritable, Text,DoubleWritable> {
10 | 
11 |     @Override
12 |     protected void reduce(OrderBean key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
13 |         int num = 0;
14 |         for(DoubleWritable value: values) {
15 |             if(num < 2) {
16 |                 String keyOut = key.getUserid() + "-----" + key.getDatetime();
17 |                 context.write(new Text(keyOut), value);
18 |                 num++;
19 |             } else {
20 |                 break;
21 |             }
22 |         }
23 | 
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/MyPartitioner.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import org.apache.hadoop.io.DoubleWritable;
 4 | import org.apache.hadoop.mapreduce.Partitioner;
 5 | 
 6 | //mapper的输出key类型是自定义的key类型OrderBean；输出value类型是单笔订单的总开销double -> DoubleWritable
 7 | public class MyPartitioner extends Partitioner<OrderBean, DoubleWritable> {
 8 |     @Override
 9 |     public int getPartition(OrderBean orderBean, DoubleWritable doubleWritable, int numReduceTasks) {
10 |         //userid相同的，落入同一分区
11 |         return (orderBean.getUserid().hashCode() & Integer.MAX_VALUE) % numReduceTasks;
12 |     }
13 | }


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/secondarysort/OrderBean.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.secondarysort;
 2 | 
 3 | import lombok.Data;
 4 | import org.apache.hadoop.io.WritableComparable;
 5 | 
 6 | import java.io.DataInput;
 7 | import java.io.DataOutput;
 8 | import java.io.IOException;
 9 | 
10 | @Data
11 | public class OrderBean implements WritableComparable<OrderBean> {
12 | 
13 |     //用户ID
14 |     private String userid;
15 |     //年月
16 |     //year+month -> 201408
17 |     private String datetime;
18 |     //标题
19 |     private String title;
20 |     //单价
21 |     private double unitPrice;
22 |     //购买量
23 |     private int purchaseNum;
24 |     //商品ID
25 |     private String produceId;
26 | 
27 |     public OrderBean() {
28 |     }
29 | 
30 |     public OrderBean(String userid, String datetime, String title, double unitPrice, int purchaseNum, String produceId) {
31 |         super();
32 |         this.userid = userid;
33 |         this.datetime = datetime;
34 |         this.title = title;
35 |         this.unitPrice = unitPrice;
36 |         this.purchaseNum = purchaseNum;
37 |         this.produceId = produceId;
38 |     }
39 | 
40 |     //key的比较规则
41 |     public int compareTo(OrderBean other) {
42 |         //OrderBean作为MR中的key；如果对象中的userid相同，即ret1为0；就表示两个对象是同一个用户
43 |         int isEquals = this.userid.compareTo(other.userid);
44 | 
45 |         if (isEquals == 0) {
46 |             //如果userid相同，比较年月
47 |             String thisYearMonth = this.getDatetime();
48 |             String otherYearMonth = other.getDatetime();
49 |             int isEqualsWithDate = thisYearMonth.compareTo(otherYearMonth);
50 | 
51 |             if(isEqualsWithDate == 0) {//若datetime相同
52 |                 //如果userid、年月都相同，比较单笔订单的总开销
53 |                 Double thisTotalPrice = this.getPurchaseNum()*this.getUnitPrice();
54 |                 Double oTotalPrice = other.getPurchaseNum()*other.getUnitPrice();
55 |                 //总花销降序排序；即总花销高的排在前边
56 |                 return -thisTotalPrice.compareTo(oTotalPrice);
57 |             } else {
58 |                 //若datatime不同，按照datetime升序排序
59 |                 return isEqualsWithDate;
60 |             }
61 |         } else {
62 |             //按照userid升序排序
63 |             return isEquals;
64 |         }
65 |     }
66 | 
67 |     /**
68 |      * 序列化
69 |      * @param dataOutput
70 |      * @throws IOException
71 |      */
72 |     public void write(DataOutput dataOutput) throws IOException {
73 |         dataOutput.writeUTF(userid);
74 |         dataOutput.writeUTF(datetime);
75 |         dataOutput.writeUTF(title);
76 |         dataOutput.writeDouble(unitPrice);
77 |         dataOutput.writeInt(purchaseNum);
78 |         dataOutput.writeUTF(produceId);
79 |     }
80 | 
81 |     /**
82 |      * 反序列化
83 |      * @param dataInput
84 |      * @throws IOException
85 |      */
86 |     public void readFields(DataInput dataInput) throws IOException {
87 |         this.userid = dataInput.readUTF();
88 |         this.datetime = dataInput.readUTF();
89 |         this.title = dataInput.readUTF();
90 |         this.unitPrice = dataInput.readDouble();
91 |         this.purchaseNum = dataInput.readInt();
92 |         this.produceId = dataInput.readUTF();
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/LogCount.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 | 
12 | public class LogCount {
13 | 
14 |     public static void main(String[] args) throws Exception {
15 | 
16 |         // 读取配置文件
17 |         Configuration conf = new Configuration();
18 | 
19 |         Path out = new Path(args[1]);
20 |         FileSystem fs = FileSystem.get(conf);
21 | 
22 |         //判断输出路径是否存在，当路径存在时mapreduce会报错
23 |         if (fs.exists(out)) {
24 |             fs.delete(out, true);
25 |             System.out.println("ouput is exit  will delete");
26 |         }
27 | 
28 |         // 创建任务
29 |         Job job = Job.getInstance(conf, LogCount.class.getName());
30 |         // 设置job的主类
31 |         job.setJarByClass(LogCount.class); // 主类
32 | 
33 |         // 设置作业的输入路径
34 |         FileInputFormat.setInputPaths(job, new Path(args[0]));
35 | 
36 |         //设置map的相关参数
37 |         job.setMapperClass(LogCountMapper.class);
38 | 
39 |         //设置reduce相关参数
40 |         job.setReducerClass(LogCountReducer.class);
41 |         job.setOutputKeyClass(Text.class);
42 |         job.setOutputValueClass(LongWritable.class);
43 | 
44 |         //设置作业的输出路径
45 |         FileOutputFormat.setOutputPath(job, out);
46 | 
47 |         job.setNumReduceTasks(2);
48 | 
49 | 
50 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
51 |     }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/LogCountMapper.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Mapper;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | 
10 | public class LogCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
11 |     /**
12 |      * 读取输入文件
13 |      */
14 |     @Override
15 |     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
16 |         String line = value.toString();
17 |         String[] words = line.split(" ");
18 |         for (String word : words) {
19 |             //通过上下文将结果输出
20 |             context.write(new Text(word), new LongWritable(1L));
21 |         }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/LogCountReducer.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Reducer;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | public class LogCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
10 |     /**
11 |      *
12 |      */
13 |     @Override
14 |     protected void reduce(Text key, Iterable<LongWritable> values, Context context)
15 |             throws IOException, InterruptedException {
16 |         Long count = 0L;
17 |         for (LongWritable value : values) {
18 |             count += value.get();
19 |         }
20 |         //统计结果的输出
21 |         context.write(key, new LongWritable(count));
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/MyPartitioner.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Partitioner;
 6 | 
 7 | /**
 8 |  * 自定义partitioner
 9 |  */
10 | public class MyPartitioner extends Partitioner<Text, LongWritable> {
11 | 
12 | 
13 |     /**
14 |      *
15 |      * 重写方法
16 |      *
17 |      * @param arg0 输入数据
18 |      * @param arg1
19 |      * @param arg2
20 |      * @return 返回值为分区序号
21 |      */
22 |     @Override
23 |     public int getPartition(Text arg0, LongWritable arg1, int arg2) {
24 | 
25 | 
26 |         if (arg0.toString().equals("hadoop")) {
27 |             return 0;
28 |         }
29 |         if (arg0.toString().equals("spark")) {
30 |             return 1;
31 |         }
32 |         if (arg0.toString().equals("hbase")) {
33 |             return 2;
34 |         }
35 |         return 3;
36 |     }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/WordCountByPartitioner.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 | 
12 | public class WordCountByPartitioner {
13 |     /**
14 |      * @Title: main @Description: 定义的driver：封装了mapreduce作业的所有信息 @param @param
15 |      * args @param @throws Exception @return void @throws
16 |      */
17 |     public static void main(String[] args) throws Exception {
18 | 
19 |         // 设置环境变量HADOOP_USER_NAME，其值是root
20 |         // 在本机调试
21 |         // 读取配置文件
22 |         Configuration conf = new Configuration();
23 | 		conf.set("fs.defaultFS", "hdfs://spark01:9000");
24 | 		conf.set("yarn.resourcemanager.hostname", "spark01");
25 | 
26 | 
27 |         /**
28 |          * MR压缩相关
29 |          * 在mr中为了减少磁盘和网络io同时可以开启压缩
30 |          */
31 |         //map端压缩
32 |         conf.set("mapreduce.map.output.compress", "true");
33 |         conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
34 |         //输出端压缩
35 |         conf.set("mapreduce.output.fileoutputformat.compress", "true");
36 |         conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
37 | 
38 |         Path out = new Path(args[1]);
39 |         FileSystem fs = FileSystem.get(conf);
40 | 
41 |         //判断输出路径是否存在，当路径存在时mapreduce会报错
42 |         if (fs.exists(out)) {
43 |             fs.delete(out, true);
44 |             System.out.println("ouput is exit  will delete");
45 |         }
46 | 
47 |         // 创建任务
48 |         Job job = Job.getInstance(conf, WordCountByPartitioner.class.getName());
49 |         // 设置job的主类
50 |         job.setJarByClass(WordCountByPartitioner.class); // 主类
51 | 
52 |         // 设置作业的输入路径
53 |         FileInputFormat.setInputPaths(job, new Path(args[0]));
54 | 
55 |         //设置map的相关参数
56 |         job.setMapperClass(WordCountMapper.class);
57 | 
58 | 
59 |         /**
60 |          * 需要注意的事Combiner就是Reducer，他相当于在map端进行的一个reducer，以便于减少网络io
61 |          * - 使用combine时，首先考虑当前MR是否适合combine
62 |          * - 总原则是不论使不使用combine不能影响最终的结果
63 |          * - 在MR时，发生数据倾斜，且可以使用combine时，可以使用combine缓解数据倾斜
64 |          */
65 |         job.setCombinerClass(WordCountReducer.class);
66 | 
67 |         //设置reduce相关参数
68 |         job.setReducerClass(WordCountReducer.class);
69 |         job.setOutputKeyClass(Text.class);
70 |         job.setOutputValueClass(LongWritable.class);
71 | 
72 |         // 设置自定义partitioner
73 |         job.setPartitionerClass(MyPartitioner.class);
74 | 
75 |         //设置作业的输出路径
76 |         FileOutputFormat.setOutputPath(job, out);
77 | 
78 |         //设置reduce为4 否则partitioner不会生效
79 |         job.setNumReduceTasks(4);
80 | 
81 | 
82 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
83 |     }
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/WordCountMapper.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Mapper;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | /**
10 |  * @author LiJiaqi
11 |  * @ClassName: WordCountMapper
12 |  * @Description:使用mapreduce开发wordcount程序
13 |  * @date 2018年8月22日 下午11:15:54
14 |  */
15 | public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
16 |     /**
17 |      * 读取输入文件
18 |      */
19 |     @Override
20 |     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
21 |         //按行读取文件
22 |         String line = value.toString();
23 |         // 20111230115903	262c9791427904631304a5eea4484bd5	音乐	4	1	http://mp3.baidu.com/
24 |         String[] words = line.split(" ");
25 |         // 通过上下文将结果输出 userid 262c9791427904631304a5eea4484bd5
26 |         context.write(new Text(words[1]), new LongWritable(Long.parseLong(words[1])));
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/mr/wordcount/WordCountReducer.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hadoop.mr.wordcount;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Reducer;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
10 |     /**
11 |      *
12 |      */
13 |     @Override
14 |     protected void reduce(Text key, Iterable<LongWritable> values, Context context)
15 |             throws IOException, InterruptedException {
16 |         Long count = 0L;
17 |         for (LongWritable value : values) {
18 |             count += value.get();
19 |         }
20 |         //统计结果的输出
21 |         context.write(key, new LongWritable(count));
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hadoop/集群部署文档.md:
--------------------------------------------------------------------------------
  1 | # 部署文档
  2 | 
  3 | ## 服务器集群
  4 | 
  5 | hostname | IP | user | password | path | os |
  6 | ---------|----|-------|---------|-------|-----|
  7 | node01 |  192.168.218.110  | hadoop | java | /hadoop | Centos7
  8 | node02 |  192.168.218.120  | hadoop | java | /hadoop  | Centos7
  9 | node03 |192.168.218.130| hadoop | java | /hadoop | Centos7
 10 | 
 11 | ## <span id="Plan">集群规划</span>
 12 | 
 13 | node01 | node02 | node03
 14 | ---------|---------|-------|
 15 | namenode | namenode |
 16 | datanode | datanode | datanode
 17 | zookeeper | zookeeper | zookeeper
 18 | ResourceManage | ResourceManage
 19 | NodeManage | NodeManage | NodeManage
 20 | JournalNode | JournalNode | JournalNode
 21 | DFSZKFailoverController | DFSZKFailoverController |
 22 | 
 23 | ## 组件版本
 24 | 
 25 | 组件 | 版本 | 下载地址
 26 | -----|------|------|
 27 | Centos7 | CentOS-7-x86_64-DVD-1908.iso | [linux服务器下载地址](https://mirrors.aliyun.com/centos/7/isos/x86_64/CentOS-7-x86_64-DVD-1908.iso)
 28 | JDK | jdk1.8.0_141 | [Jdk](https://www.oracle.com/technetwork/cn/java/javase/downloads/jdk8-downloads-2133151-zhs.html)
 29 | Zookeeper | zookeeper-3.4.5-cdh5.14.2 | [zookeeper下载地址](https://www.baidu.com)
 30 | Hadoop | hadoop-2.6.0-cdh5.14.2 | [hadoop下载地址](http://archive.cloudera.com/cdh5/cdh/5/)
 31 | 
 32 | ## 虚拟机安装跳过
 33 | 
 34 | ## 网卡配置
 35 | 
 36 | mac参考以下链接
 37 | 
 38 | [Mac VMware Fusion CentOS7配置静态IP](https://www.cnblogs.com/itbsl/p/10998696.html)
 39 | 
 40 | windows参考以下链接
 41 | 
 42 | [windows配置链接](https://www.baidu.com)
 43 | 
 44 | ## 配置服务器
 45 | 
 46 | 从此步开始，开始进行服务器基础环境配置。
 47 | 
 48 | ### 将集群中所有的机器hostname ip 映射，添加到/etc/hosts
 49 | 
 50 | 映射后集群间将不在需要使用ip
 51 | 
 52 | ```shell
 53 | 192.168.218.110 node01
 54 | 192.168.218.120 node02
 55 | 192.168.218.130 node03
 56 | ```
 57 | 
 58 | ### 关闭防火墙
 59 | 
 60 | 关闭防火墙主要是为了集群机器间的通信
 61 | 
 62 | ```shell
 63 | # 关闭防火墙
 64 | systemctl disable firewalld.service
 65 | 
 66 | # 查看防火墙状态
 67 | systemctl status firewalld.service
 68 | # 已关闭将输出 Active: inactive (dead)
 69 | ```
 70 | 
 71 | ### <span id="jump">配置网卡及主机名</span>
 72 | 
 73 | 编辑文件`vim /etc/sysconfig/network-scripts/ifcfg-eth0` ，写入如下配置：
 74 | 
 75 | ```shell
 76 | TYPE=Ethernet
 77 | PROXY_METHOD=none
 78 | BROWSER_ONLY=no
 79 | DEFROUTE=yes
 80 | IPV4_FAILURE_FATAL=no
 81 | IPV6INIT=yes
 82 | IPV6_AUTOCONF=yes
 83 | IPV6_DEFROUTE=yes
 84 | IPV6_FAILURE_FATAL=no
 85 | IPV6_ADDR_GEN_MODE=stable-privacy
 86 | NAME=eth0
 87 | DEVICE=eth0
 88 | IPV6_PRIVACY=no
 89 | PREFIX=24
 90 | 
 91 | ## 下边的几项配置是修改的
 92 | #UUID=f22334e3-05d1-450e-a50a-1da9f5f27915
 93 | ONBOOT=yes
 94 | BOOTPROTO=static
 95 | IPADDR=192.168.218.110 # 当前机器的ip
 96 | GATEWAY=192.168.218.2 # 路由 要求网段一直 218 数字与ip的一样
 97 | DNS1=192.168.218.2 # 同上
 98 | ```
 99 | 
100 | 配置主机名编辑文件`/etc/hostname`，添加下列
101 | 
102 | ```shell
103 | node01
104 | ```
105 | 
106 | 配置完成后，重启网络服务。
107 | 
108 | ```shell
109 | service network restart
110 | ```
111 | 
112 | ### <span id="ntpdate">同步服务器时间</span>
113 | 
114 | ```shell
115 | # 安装ntpdate
116 | yum -y install ntpdate
117 | 
118 | 
119 | # 安装完成执行命令
120 | crontab -e
121 | 
122 | 
123 | # 此时进入文本编辑模式  使用 i 插入下列命令
124 | */1 * * * * /usr/sbin/ntpdate time1.aliyun.com
125 | # 填写完成后，输入 :wq 保存退出
126 | ```
127 | 
128 | ### 添加用户
129 | 
130 | 按步骤执行以下命令
131 | 
132 | ```shell
133 | # 添加用户组
134 | groupadd hadoop
135 | 
136 | 
137 | # 创建用户并添加到hadoop组中
138 | useradd -g hadoop hadoop
139 | 
140 | 
141 | # 使用id命令查看hadoop用户组和hadoop用户创建是否成功
142 | id hadoop
143 | # 正常输出 uid=1000(hadoop) gid=1000(hadoop) groups=1000(hadoop)
144 | 
145 | 
146 | # 设置hadoop用户密码为hadoop
147 | passwd hadoop
148 | 
149 | ```
150 | 
151 | ### 切换到hadoop用户！！
152 | ### 切换到hadoop用户！！
153 | ### 切换到hadoop用户！！
154 | 
155 | **谨记：从这里开始未声明使用root用户，默认都是用hadoop用户操作！！！**
156 | **谨记：从这里开始未声明使用root用户，默认都是用hadoop用户操作！！！**
157 | **谨记：从这里开始未声明使用root用户，默认都是用hadoop用户操作！！！**
158 | 
159 | ```shell
160 | su - hadoop
161 | ```
162 | 
163 | ### 创建应用安装包以及数据存储目录
164 | 
165 | ```java
166 |  mkdir -p /hadoop/soft     # 软件压缩包存放目录
167 |  mkdir -p /hadoop/install  # 软件解压后存放目录
168 |  mkdir -p /hadoop/datadir  # 各应用的数据存放目录
169 |  chown -R hadoop:hadoop /hadoop    # 将文件夹权限更改为hadoop用户
170 | ```
171 | 
172 | ### 上传安装包以及解压
173 | 
174 | #### 上传
175 | 
176 | 根据下载链接将需要组件下载到宿主机,由宿主机上传到虚拟机中
177 | 
178 | **注意**：这里上传时要使用`hadoop`用户，不然还需要更改文件所属用户！！
179 | 
180 | 上传路径为 `/hadoop/soft`~
181 | 
182 | 至于用什么方式，sftp、scp或其他工具都可！
183 | 
184 | #### 解压
185 | 
186 | 使用`hadoop`用户登录，解压命令直接解压即可
187 | 
188 | **注意**：一定要用`hadoop`用户！！！
189 | 
190 | ```shell
191 | tar -xzvf hadoop-2.6.0-cdh5.14.2.tar.gz -C /hadoop/install/
192 | ```
193 | 
194 | ### 配置jdk
195 | 
196 | jdk可选择配置全局，也可以选择配置只针对`hadoop`用户。
197 | 
198 | 这里我选择配置只针对`hadoop`用户~
199 | 
200 | 命令 `vim ~/.bash_profile`
201 | 
202 | ```shell
203 | export JAVA_HOME=/hadoop/install/jdk1.8.0_141
204 | 
205 | PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
206 | ```
207 | 
208 | 修改完成使用命令 `source ~/.bash_profile`，更新用户环境变量。
209 | 
210 | **验证环境**：
211 | 
212 | ```shell
213 | java -verison
214 | 
215 | # 正常输出。jdk版本
216 | # 错误输出 找不到命令
217 | ```
218 | 
219 | ## 配置zookeeper
220 | 
221 | zookeeper的配置较为简单,只需要添加两个文件即可
222 | 
223 | 第一个文件 zoo.cfg，命令 `vim zoo.cfg`
224 | 
225 | ```shell
226 | tickTime=2000
227 | initLimit=10
228 | syncLimit=5
229 | clientPort=2181
230 | 
231 | # 路径需要根据你的真实情况进行修改
232 | dataDir=/hadoop/datadir/zookeeper/
233 | # 只修改你的主机hostname就可以，我这里三台机器命名为，`node01`、`node02`、`node03`
234 | server.1=node01:2888:3888
235 | server.2=node02:2888:3888
236 | server.3=node03:2888:3888
237 | ```
238 | 
239 | 第二个文件 myid，进入第一个配置文件中`dataDir`配置的目录，命令 `vim myid`，添加 `1`，即可（**这块每台机器不一样，在我们克隆虚拟机镜像后
240 | 需要手动将其修改！！稍后介绍**）。
241 | 
242 | ## 配置hadoop
243 | 
244 | **hadoop的配置文件不需要区分节点**，也就是说每个几点的配置文件都是相同的，所以我们在克隆虚拟机镜像前先将其配置好，
245 | 这样在克隆镜像后尽量最小的配置文件改动！
246 | 
247 | ### 配置环境变量（参考jdk配置）
248 | 
249 | ```shell
250 | export HADOOP_HOME=/hadoop/install/hadoop-2.6.0-cdh5.14.2
251 | 
252 | PATH=$PATH:$HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
253 | ```
254 | 
255 | 修改完成使用命令 `source ~/.bash_profile`，更新用户环境变量。
256 | 
257 | ### 配置xml
258 | 
259 | 需要修改的xml一共四个，都存放在`/hadoop/install/hadoop-2.6.0-cdh5.14.2/etc/hadoop/`目录下，`core-site.xml`、`hdfs-site.xml`、`yarn-site.xml`、`mapred-site.xml`。
260 | 
261 | #### core-site.xml
262 | 
263 | [获取core-site.xml](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hadoop/config/core-site.xml)
264 | 
265 | #### hdfs-site.xml
266 | 
267 | [获取hdfs-site.xml](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hadoop/config/hdfs-site.xml)
268 | 
269 | #### yarn-site.xml
270 | 
271 | [获取yarn-site.xml](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hadoop/config/yarn-site.xml)
272 | 
273 | #### mapred-site.xml
274 | 
275 | [获取mapred-site.xml](https://github.com/sev7e0/bigdata-practice/blob/master/src/main/java/com/tools/hadoop/config/mapred-site.xml)
276 | 
277 | ### 更改slaves
278 | 
279 | `slaves`文件同样存在`/hadoop/install/hadoop-2.6.0-cdh5.14.2/etc/hadoop/`目录中，
280 | 
281 | ```shell
282 | # vi slaves
283 | #将localhost这一行删除掉，添加下边三个节点
284 | node01
285 | node02
286 | node03
287 | ```
288 | 
289 | ### <span id="mkdir">手动创建hadoop所需数据目录</span>
290 | 
291 | 下边给出命令，直接整体复制执行即可，若你修改了路径，则需要对应的修改。
292 | 
293 | ```shell
294 | mkdir -p /hadoop/datadir/hadoop/tempDatas
295 | mkdir -p /hadoop/datadir/hadoop/namenodeDatas
296 | mkdir -p /hadoop/datadir/hadoop/datanodeDatas
297 | mkdir -p /hadoop/datadir/hadoop/dfs/nn/edits
298 | mkdir -p /hadoop/datadir/hadoop/dfs/snn/name
299 | mkdir -p /hadoop/datadir/hadoop/dfs/nn/snn/edits
300 | mkdir -p /hadoop/datadir/hadoop/yarn/local
301 | mkdir -p /hadoop/datadir/hadoop/journal
302 | ```
303 | 
304 | **注意**：有的同学这块没有注意到上边提到的`hadoop`用户的读写权限，或者用了root创建，导致目录无法写入的异常。该路径一定要属于`hadoop`用户！！！
305 | 
306 | ## 复制虚拟机镜像
307 | 
308 | 这一步直接关机完整克隆就好了
309 | 
310 | **注意**：有的同学复制镜像的同时把虚拟机的mac地址也复制了，这样将会导致其他两台启动后无法使用，若mac地址相同，
311 | 那么重新生成一个mac地址。
312 | 
313 | ## 更改其他两台hostname、ip
314 | 
315 | 参考[配置网卡及主机名](#jump)
316 | 
317 | ## 启动每一个节点虚拟机
318 | 
319 | 启动每一台虚拟机！
320 | 
321 | ## 配置免密登录
322 | 
323 | Linux免密登录，本质上是使用了`公钥登录`。原理很简单，就是用户将自己的`公钥`储存在远程主机上。登录的时候，远程主机会向用户发送一段随机字符串，用户用自己的`私钥`加密后，再发回来。远程主机用事先储存的`公钥`进行解密，如果成功，就证明用户是可信的，直接允许登录shell，不再要求密码。
324 | 
325 | **注意**：免密登录是针对每一个不同用户的，所以我们一定要在`hadoop`用户下执行。以下命令要在每一台机器上都执行~~~
326 | 
327 | ```shell
328 | ## 生成密钥
329 | ## 期间需要输入几次回车，直接回车即可
330 | ssh-keygen -t rsa
331 | 
332 | ## 发送自己的公钥到每一台机器上，包括自己本身
333 | ## 由于每条命令都需要输入对方的密码，所以要一条一条的执行！！！
334 | ssh-copy-id -i ~/.ssh/id_rsa.pub node01
335 | ssh-copy-id -i ~/.ssh/id_rsa.pub node02
336 | ssh-copy-id -i ~/.ssh/id_rsa.pub node03
337 | ```
338 | 
339 | **注意**：一定要验证是否成功，在每台机器上相互`ssh`不需要密码就能登录，那么就说明免密登录配置成功！！！
340 | 
341 | ```shell
342 | ssh node01
343 | ```
344 | 
345 | ## zookeeper启动
346 | 
347 | ### 手动启动每一台节点
348 | 
349 | **注意**：在启动前，我们要把刚刚的zookeeper配置中myid更改一下，才可以启动！！!
350 | 
351 | 不同的机器对应不同的myid，从下边配置中获取,node01对应1，以此类推。
352 | 
353 | ```shell
354 | server.1=node01:2888:3888
355 | server.2=node02:2888:3888
356 | server.3=node03:2888:3888
357 | ```
358 | 
359 | ```shell
360 | # 启动zk
361 | # 在每一台机器上执行
362 | /hadoop/install/zookeeper-3.4.5-cdh5.14.2/bin/zkServer.sh start
363 | # 检查状态
364 | /hadoop/install/zookeeper-3.4.5-cdh5.14.2/bin/zkServer.sh status
365 | ```
366 | 
367 | ### 脚本启动所有节点
368 | 
369 | ```shell
370 | #!/bin/bash --login
371 | 
372 | zookeeper=$1
373 | path=$2
374 | command=$3
375 | 
376 | A=start status stop
377 | 
378 | start(){
379 |         echo "$1 zookeeper on $2"
380 |         ssh -l hadoop $2  "$3 $1"
381 | }
382 | 
383 | if [ "$zookeeper" == "" ] || [ "$command" == "" ];then
384 |         echo "usage：'node01 node02 node03' ./zkServer.sh  [start status stop]"
385 |         exit 0
386 | fi
387 | 
388 | # 判断是否为支持的命令
389 | for c in $A
390 | do
391 |         if [ "$command" != "$c" ];then
392 |                 echo "当前只支持：[start status stop]命令"
393 |                 exit 0
394 |         fi
395 | done
396 | 
397 | if [ "$command" != "" ];then
398 |         for zk in $zookeeper
399 |         do
400 |                 start $command $zk $path
401 |         done
402 |         else
403 |                 echo "请输入正确命令"
404 |                 echo "'node01 node02 node03' ./zkServer.sh  [start status stop]"
405 | fi
406 | ```
407 | 
408 | 启动只需要在主节点执行脚本即可！！！
409 | 
410 | ```shell
411 | ./zkcluster_run.sh 'node01 node02 node03'   /hadoop/install/zookeeper-3.4.5-cdh5.14.2/bin/zkServer.sh start
412 | ```
413 | 
414 | ## hadoop格式化并启动
415 | 
416 | ### 格式化namenode
417 | 
418 | 初始化的目的就是为了hdfs的元数据信息的初始化。
419 | 
420 | **注意：** NameNode格式化只能在node01执行一次，不然会导致集群启动失败，！！！！
421 | 
422 | 命令
423 | 
424 | ```shell
425 | hdfs namenode -format
426 | ```
427 | 
428 | 成功的标志：
429 | 
430 | ```log
431 | 19/08/23 04:32:34 INFO namenode.NameNode: STARTUP_MSG: 
432 | /************************************************************
433 | STARTUP_MSG: Starting NameNode
434 | STARTUP_MSG:   user = hadoop
435 | STARTUP_MSG:   host = ......
436 | STARTUP_MSG:   args = [-format]
437 | STARTUP_MSG:   version = 2.6.0-cdh5.14.2
438 | #显示格式化成功。。。
439 | cdh5.14.2/hadoopDatas/namenodeDatas has been successfully formatted.
440 | 19/08/23 04:32:35 INFO common.Storage: Storage directory /hadoop/install/hadoop-2.6.0-cdh5.14.2/hadoopDatas/dfs/nn/edits has been successfully formatted.
441 | 19/08/23 04:32:35 INFO namenode.FSImageFormatProtobuf: Saving image file /hadoop/install/hadoop-2.6.0-cdh5.14.2/hadoopDatas/namenodeDatas/current/fsimage.ckpt_0000000000000000000 using no compression
442 | 19/08/23 04:32:35 INFO namenode.FSImageFormatProtobuf: Image file /hadoop/install/hadoop-2.6.0-cdh5.14.2/hadoopDatas/namenodeDatas/current/fsimage.ckpt_0000000000000000000 of size 323 bytes saved in 0 seconds.
443 | 19/08/23 04:32:35 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
444 | 19/08/23 04:32:35 INFO util.ExitUtil: Exiting with status 0
445 | 19/08/23 04:32:35 INFO namenode.NameNode: SHUTDOWN_MSG:
446 | #此处省略部分日志
447 | /************************************************************
448 | SHUTDOWN_MSG: Shutting down NameNode at .....
449 | ************************************************************/
450 | ```
451 | 
452 | ### <span id="bootstrap">同步namenode</span>
453 | 
454 | 在master的NameNode启动之后，我们进行对NameNode的数据同步
455 | 在standby-master(也就是我们node02)输入以下命令，输出的日志和上边的相仿
456 | 
457 | ```shell
458 | hdfs namenode -bootstrapStandby
459 | ```
460 | 
461 | 如上步骤都顺利的话接下来就可以启动集群了！
462 | 
463 | ### 启动集群
464 | 
465 | 两种方式~
466 | 
467 | ```shell
468 | start-all.sh
469 | # 不过这种方式官方已经不在推荐了
470 | ```
471 | 
472 | 可以使用如下启动
473 | 
474 | ```shell
475 | # 启动hdfs
476 | start-dfs.sh
477 | # 启动yarn
478 | start-yarn.sh
479 | ```
480 | 
481 | ### 查看进程
482 | 
483 | node01大概长这样，其他两台节点参考[集群规划](#Plan)，部署了的那么一定存在进程，若不存在进程，那么需要查看日志解决问题~
484 | 
485 | ```shell
486 | 12707 DFSZKFailoverController
487 | 12820 ResourceManager
488 | 12327 DataNode
489 | 12521 JournalNode
490 | 12220 NameNode
491 | 12941 NodeManager
492 | 1578 QuorumPeerMain # zookeeper进程，其余全都是hadoop进程
493 | ```
494 | 
495 | ### 查看webUI
496 | 
497 | 启动完成后可以通过webUI查看集群的信息，打开下边链接即可查看！！
498 | 
499 | 两个节点都可以查看，要确保一个为active，另一个为standby的状态！
500 | 
501 | [node01:50070](http://node01:50070)
502 | [node02:50070](http://node02:50070)
503 | 
504 | ## 常见问题
505 | 
506 | ### 不小心多次格式化namenode
507 | 
508 | 若不小心在每台机器上都执行了`hdfs namenode -format`，此时每台节点的集群id将会不一致会导致其他机器无法加入集群！
509 | 
510 | **解决办法：**清空[创建的每一个数据目录](#mkdir)！重新执行`hdfs namenode -format`即可！切记！！！只在node01上执行，执行完后要在node02[同步](#bootstrap)
511 | 
512 | 
513 | ### yarn启动异常
514 | 
515 | ```log
516 | 2019-09-30 18:15:49,231 FATAL org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Error starting ResourceManager
517 | org.apache.hadoop.HadoopIllegalArgumentException: Configuration doesn't specify yarn.resourcemanager.cluster-id
518 |     at org.apache.hadoop.yarn.conf.YarnConfiguration.getClusterId(YarnConfiguration.java:1785)
519 |     at org.apache.hadoop.yarn.server.resourcemanager.EmbeddedElectorService.serviceInit(EmbeddedElectorService.java:82)
520 |     at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
521 |     at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107)
522 |     at org.apache.hadoop.yarn.server.resourcemanager.AdminService.serviceInit(AdminService.java:145)
523 |     at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
524 |     at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107)
525 |     at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:276)
526 |     at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
527 |     at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:1309)
528 | ```
529 | 
530 | **解决办法：**在yarn-site.xml中配置一个id，如下。
531 | 
532 | ```xml
533 | <property>
534 |     <name>yarn.resourcemanager.cluster-id</name>
535 |     <value>cluster1</value>
536 | </property>
537 | ```
538 | 
539 | ### ZKFailoverController启动失败问题
540 | 
541 | #### 异常一
542 | 
543 | ```log
544 | 2019-09-30 18:15:45,010 FATAL org.apache.hadoop.hdfs.tools.DFSZKFailoverController: Got a fatal error, exiting now
545 | java.lang.IllegalArgumentException: Missing required configuration 'ha.zookeeper.quorum' for ZooKeeper quorum
546 |     at com.google.common.base.Preconditions.checkArgument(Preconditions.java:115)
547 |     at org.apache.hadoop.ha.ZKFailoverController.initZK(ZKFailoverController.java:340)
548 |     at org.apache.hadoop.ha.ZKFailoverController.doRun(ZKFailoverController.java:190)
549 |     at org.apache.hadoop.ha.ZKFailoverController.access$000(ZKFailoverController.java:60)
550 |     at org.apache.hadoop.ha.ZKFailoverController$1.run(ZKFailoverController.java:171)
551 |     at org.apache.hadoop.ha.ZKFailoverController$1.run(ZKFailoverController.java:167)
552 |     at org.apache.hadoop.security.SecurityUtil.doAsLoginUserOrFatal(SecurityUtil.java:444)
553 |     at org.apache.hadoop.ha.ZKFailoverController.run(ZKFailoverController.java:167)
554 |     at org.apache.hadoop.hdfs.tools.DFSZKFailoverController.main(DFSZKFailoverController.java:192)
555 | ```
556 | 
557 | **解决办法：**
558 | 
559 | - 确认是否配置了
560 | 
561 | ```xml
562 | <!--zookeeper配置，hadoop依赖zk进行选主-->
563 | <property>
564 |     <name>ha.zookeeper.quorum</name>
565 |     <value>node01:2181,node02:2181,node03:2181</value>
566 | </property>
567 | ```
568 | 
569 | - 检查服务器时间是否同步
570 | 
571 | [如何同步服务器时间](#ntpdate)
572 | 
573 | **注意：** 同步需要在root用户下。
574 | 
575 | #### 异常二
576 | 
577 | ```log
578 | 2019-09-30 15:42:05,418 FATAL org.apache.hadoop.ha.ZKFailoverController: Unable to start failover controller. Parent znode does not exist.
579 | Run with -formatZK flag to initialize ZooKeeper.
580 | ```
581 | 
582 | **解决办法：** 此刻以为这你的hadoop节点还没有注册到zookeeper中，需要初始化。
583 | 
584 | ```shell
585 | # 执行命令进行初始化
586 | hdfs zkfc -formatZK
587 | ```
588 | 
589 | 重新起动集群即可。


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/HBaseFilter.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.hbase.CellUtil;
 6 | import org.apache.hadoop.hbase.TableName;
 7 | import org.apache.hadoop.hbase.client.Connection;
 8 | import org.apache.hadoop.hbase.client.ConnectionFactory;
 9 | import org.apache.hadoop.hbase.client.Scan;
10 | import org.apache.hadoop.hbase.client.Table;
11 | import org.apache.hadoop.hbase.filter.Filter;
12 | import org.apache.hadoop.hbase.filter.PrefixFilter;
13 | import org.apache.hadoop.hbase.filter.RandomRowFilter;
14 | import org.apache.hadoop.hbase.util.Bytes;
15 | 
16 | import java.io.IOException;
17 | 
18 | @Slf4j
19 | public class HBaseFilter {
20 | 
21 |     private static Connection connection;
22 |     static {
23 |         Configuration configuration = new Configuration();
24 |         configuration.set("hbase.zookeeper.quorum", "spark01");
25 |         configuration.set("hbase.zookeeper.property.clientPort", "2181");
26 |         try {
27 |             connection = ConnectionFactory.createConnection(configuration);
28 |         } catch (IOException e) {
29 |             log.error(e.getMessage());
30 |         }
31 |     }
32 | 
33 |     public static void main(String[] args) throws IOException {
34 |         Table table = connection.getTable(TableName.valueOf(HBaseTestUtil.getTableName("table_20191108")));
35 | 
36 |         // rowId前缀过滤
37 |         log.warn("PrefixFilter");
38 |         Filter pf = new PrefixFilter(Bytes.toBytes("1"));
39 |         Scan scan00 = new Scan().setFilter(pf);
40 |         table.getScanner(scan00).forEach(res-> log.info(Bytes.toString(res.getValue(HBaseTestUtil.getFamilyName(null), "data_stamp".getBytes()))));
41 | 
42 |         //随机百分比过滤
43 |         log.warn("RandomRowFilter");
44 |         Filter randomRowFilter = new RandomRowFilter(0.003f);
45 |         Scan scan01 = new Scan().setFilter(randomRowFilter);
46 |         table.getScanner(scan01).forEach(res-> res.listCells().forEach(cell -> log.info("{} : {} : {} : {} : {}",
47 |                 Bytes.toString(CellUtil.cloneRow(cell)),
48 |                 Bytes.toString(CellUtil.cloneFamily(cell)),
49 |                 Bytes.toString(CellUtil.cloneQualifier(cell)),
50 |                 Bytes.toString(CellUtil.cloneValue(cell)),
51 |                 cell.getTimestamp())));
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/HBaseReadWrite.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.hbase.HBaseConfiguration;
 6 | import org.apache.hadoop.hbase.HColumnDescriptor;
 7 | import org.apache.hadoop.hbase.HTableDescriptor;
 8 | import org.apache.hadoop.hbase.TableName;
 9 | import org.apache.hadoop.hbase.client.*;
10 | import org.apache.hadoop.hbase.util.Bytes;
11 | 
12 | import java.io.IOException;
13 | 
14 | @Slf4j
15 | public class HBaseReadWrite {
16 | 
17 |     private Connection connection = null;
18 | 
19 |     public static void main(String[] args) throws IOException {
20 |         HBaseReadWrite readWrite = new HBaseReadWrite();
21 |         readWrite.init();
22 |         readWrite.creatTable();
23 |         for (int i = 0; i < 10000; i++) {
24 |             long stamp = System.currentTimeMillis();
25 |             String data = "data_"+stamp;
26 |             System.out.println("insert data :"+data+"");
27 |             readWrite.insert(HBaseTestUtil.getTableName(null), String.valueOf(stamp), HBaseTestUtil.getFamilyName(null), "data_stamp".getBytes(), data);
28 |         }
29 | 
30 |         readWrite.scan(HBaseTestUtil.getTableName(null), HBaseTestUtil.getFamilyName(null), "data_stamp");
31 |     }
32 | 
33 |     /**
34 |      * 初始化连接
35 |      *
36 |      * @throws IOException
37 |      */
38 |     private void init() throws IOException {
39 |         Configuration configuration = HBaseConfiguration.create();
40 | 
41 |         connection = ConnectionFactory.createConnection(configuration);
42 |     }
43 | 
44 |     /**
45 |      * 创建表
46 |      *
47 |      * @throws IOException
48 |      */
49 |     private void creatTable() throws IOException {
50 |         Admin admin = connection.getAdmin();
51 |         TableName tableName = TableName.valueOf(HBaseTestUtil.getTableName(null));
52 |         if (admin.tableExists(tableName)) {
53 |             // hbase 在删除表之前要先 disable
54 |             admin.disableTable(tableName);
55 |             admin.deleteTable(tableName);
56 |         }
57 |         HTableDescriptor descriptor = new HTableDescriptor(tableName);
58 |         descriptor.addFamily(new HColumnDescriptor(HBaseTestUtil.getFamilyName(null)));
59 |         admin.createTable(descriptor);
60 |     }
61 | 
62 |     /**
63 |      * 插入数据
64 |      *
65 |      * @param tableN     表名
66 |      * @param rowId      row id
67 |      * @param familyName 列族
68 |      * @param qualifier  列
69 |      * @param value      数据
70 |      * @throws IOException
71 |      */
72 |     private void insert(byte[] tableN, String rowId, byte[] familyName, byte[] qualifier, String value) throws IOException {
73 |         TableName tableName = TableName.valueOf(tableN);
74 |         Table table = connection.getTable(tableName);
75 |         Put put = new Put(rowId.getBytes());
76 |         put.addColumn(familyName, qualifier, value.getBytes());
77 |         table.put(put);
78 |     }
79 | 
80 |     /**
81 |      * @param tableN    表名
82 |      * @param familyN   列族
83 |      * @param qualifier 列
84 |      * @throws IOException
85 |      */
86 |     private void scan(byte[] tableN, byte[] familyN, String qualifier) throws IOException {
87 |         TableName tableName = TableName.valueOf(tableN);
88 |         Table table = connection.getTable(tableName);
89 |         Scan scan = new Scan();
90 |         ResultScanner scanner = table.getScanner(scan);
91 |         scanner.forEach(data -> System.out.println((Bytes.toString(data.getValue(familyN, qualifier.getBytes())))));
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/HBaseTestUtil.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase;
 2 | 
 3 | import org.apache.hadoop.hbase.util.Bytes;
 4 | 
 5 | import java.time.LocalDate;
 6 | import java.time.format.DateTimeFormatter;
 7 | import java.util.Objects;
 8 | 
 9 | public class HBaseTestUtil {
10 | 
11 |     public static void main(String[] args) {
12 |         System.out.println(Bytes.toString(getTableName(null)));
13 |     }
14 | 
15 |     public static byte[] getTableName(String name) {
16 |         String format = LocalDate.now().format(DateTimeFormatter.BASIC_ISO_DATE);
17 |         return Objects.isNull(name) ? ("table_" + format).getBytes() : name.getBytes();
18 |     }
19 | 
20 |     public static byte[] getFamilyName(String name) {
21 |         String format = LocalDate.now().format(DateTimeFormatter.BASIC_ISO_DATE);
22 |         return Objects.isNull(name) ? ("family_" + format).getBytes() : name.getBytes();
23 |     }
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/HBase读写的几种方式.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/src/main/java/com/tools/hbase/HBase读写的几种方式.pdf


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/Utils.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.hbase.HBaseConfiguration;
 7 | import org.apache.hadoop.hbase.TableName;
 8 | import org.apache.hadoop.hbase.client.Connection;
 9 | import org.apache.hadoop.hbase.client.ConnectionFactory;
10 | import org.apache.hadoop.hbase.client.RegionLocator;
11 | import org.apache.hadoop.hbase.client.Table;
12 | import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
13 | 
14 | /**
15 |  * HBase Utils Class
16 |  */
17 | public class Utils {
18 | 
19 |     /**
20 |      * HBase Bulk Load method replace command line
21 |      *
22 |      * @param configuration hadoop config
23 |      * @param hFilePath     HFile path
24 |      * @param table         table name
25 |      */
26 |     public static void doBulkLoad(Configuration configuration, String hFilePath, String table) {
27 |         try {
28 |             FileSystem fileSystem = FileSystem.newInstance(configuration);
29 |             // add HBase config to Configuration object
30 |             HBaseConfiguration.addHbaseResources(configuration);
31 |             LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(configuration);
32 | 
33 |             // create HBase connection
34 |             try (Connection connection = ConnectionFactory.createConnection(configuration)) {
35 |                 Table connectionTable = connection.getTable(TableName.valueOf(table));
36 |                 RegionLocator regionLocator = connection.getRegionLocator(connectionTable.getName());
37 |                 // new client api for HBase 1.0.0+
38 |                 loadIncrementalHFiles.doBulkLoad(new Path(hFilePath), connection.getAdmin(), connectionTable, regionLocator);
39 |             }
40 |         } catch (Exception e) {
41 |             e.printStackTrace();
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/hdfs2hbase/HDFS2HBase.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase.hdfs2hbase;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.hbase.client.Put;
 6 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 7 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
 8 | import org.apache.hadoop.hbase.mapreduce.TableReducer;
 9 | import org.apache.hadoop.hbase.util.Bytes;
10 | import org.apache.hadoop.io.LongWritable;
11 | import org.apache.hadoop.io.NullWritable;
12 | import org.apache.hadoop.io.Text;
13 | import org.apache.hadoop.mapreduce.Job;
14 | import org.apache.hadoop.mapreduce.Mapper;
15 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
16 | import java.io.IOException;
17 | 
18 | public class HDFS2HBase  {
19 | 
20 |     public static class HBaseMap extends Mapper<LongWritable, Text, Text, NullWritable>{
21 |         @Override
22 |         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
23 |             context.write(value,NullWritable.get());
24 |         }
25 |     }
26 | 
27 |     public static class HBaseReduce extends TableReducer<Text,NullWritable, ImmutableBytesWritable>{
28 |         @Override
29 |         protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
30 |             String[] data = key.toString().split("\t");
31 |             Put put = new Put(data[0].getBytes());
32 |             put.addColumn("info".getBytes(),"name".getBytes(),data[1].getBytes());
33 |             put.addColumn("info".getBytes(),"sex".getBytes(),data[2].getBytes());
34 |             put.addColumn("course".getBytes(),"match".getBytes(),data[3].getBytes());
35 |             put.addColumn("course".getBytes(),"chinese".getBytes(),data[4].getBytes());
36 |             context.write(new ImmutableBytesWritable(Bytes.toBytes(data[0])),put);
37 |         }
38 |     }
39 | 
40 | 
41 |     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
42 |         Configuration configuration = new Configuration();
43 |         Job job = Job.getInstance(configuration);
44 |         job.setJarByClass(HDFS2HBase.class);
45 |         job.setInputFormatClass(TextInputFormat.class);
46 |         TextInputFormat.addInputPath(job, new Path(args[0]));
47 |         job.setMapperClass(HBaseMap.class);
48 |         job.setMapOutputKeyClass(Text.class);
49 |         job.setMapOutputValueClass(NullWritable.class);
50 | 
51 |         /**
52 |          * reduce任务交由Hbase完善
53 |          */
54 |         TableMapReduceUtil.initTableReducerJob(args[1],HBaseReduce.class,job);
55 |         job.setNumReduceTasks(1);
56 |         job.waitForCompletion(false);
57 | 
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/hdfs2hbase/scores.txt:
--------------------------------------------------------------------------------
 1 | 1001	lx	m	97	93
 2 | 1002	xm	f	92	84
 3 | 1003	bruce	m	90	83
 4 | 1100	tony	m	95	76
 5 | 1110	dl	f	87	96
 6 | 1005	xl	m	85	78
 7 | 1006	eh	f	82	99
 8 | 1009	tg	f	75	95
 9 | 2000	xingxing	m	77	63
10 | 1007	xw	m	99	65
11 | 1301	qy	f	95	73
12 | 1303	qo	f	88	79
13 | 1307	sn	m	76	82
14 | 1402	sw	f	92	91
15 | 1404	sp	f	91	83
16 | 1408	wd	f	83	94
17 | 1513	wf	m	75	86
18 | 1515	xl	f	77	85
19 | 1519	kl	f	96	77
20 | 1520	zs	m	82	79
21 | 1621	ls	m	90	96
22 | 1624	ll	m	61	94
23 | 1627	cm	f	52	85
24 | 1629	cr	f	79	81
25 | 1730	cz	m	95	87
26 | 1733	hk	f	93	56
27 | 1734	rk	m	86	88
28 | 1739	zy	m	84	99
29 | 1885	zf	f	72	71
30 | 1887	gy	f	71	86


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/processor/HBasePerson.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase.processor;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.hbase.HBaseConfiguration;
 6 | import org.apache.hadoop.hbase.HColumnDescriptor;
 7 | import org.apache.hadoop.hbase.HTableDescriptor;
 8 | import org.apache.hadoop.hbase.TableName;
 9 | import org.apache.hadoop.hbase.client.Connection;
10 | import org.apache.hadoop.hbase.client.ConnectionFactory;
11 | import org.apache.hadoop.hbase.client.Put;
12 | import org.apache.hadoop.hbase.client.Table;
13 | 
14 | import java.io.IOException;
15 | 
16 | public class HBasePerson {
17 | 
18 |     public static void main(String[] args) {
19 | 
20 |         Configuration configuration = HBaseConfiguration.create();
21 |         configuration.set("hbase.zookeeper.quorum", "spark01:2181,spark02:2181,spark03:2181");
22 |         configuration.set("hbase.table.sanity.checks", "false");
23 | 
24 |         try (Connection connection = ConnectionFactory.createConnection(configuration)) {
25 |             TableName tableName = TableName.valueOf("person");
26 |             if (!connection.getAdmin().tableExists(tableName)) {
27 |                 HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
28 |                 HColumnDescriptor info = new HColumnDescriptor("info");
29 |                 hTableDescriptor.addFamily(info);
30 |                 Path path = new Path("hdfs://spark01:9000/bigdata-practice-0.jar");
31 |                 hTableDescriptor.addCoprocessor(HBaseProcessor.class.getCanonicalName(), path, HBaseProcessor.PRIORITY_USER, null);
32 |                 connection.getAdmin().createTable(hTableDescriptor);
33 |             }
34 |             Put put = new Put("0002".getBytes());
35 |             put.addColumn("info".getBytes(), "name".getBytes(), "lishengnan".getBytes());
36 |             put.addColumn("info".getBytes(), "age".getBytes(), "18".getBytes());
37 |             try (Table person = connection.getTable(tableName)) {
38 |                 person.put(put);
39 |             }
40 |         } catch (IOException e) {
41 |             e.printStackTrace();
42 |         }
43 | 
44 | 
45 |     }
46 | 
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hbase/processor/HBaseProcessor.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hbase.processor;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.hbase.*;
 5 | import org.apache.hadoop.hbase.client.*;
 6 | import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
 7 | import org.apache.hadoop.hbase.coprocessor.ObserverContext;
 8 | import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
 9 | import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
10 | 
11 | import java.io.IOException;
12 | import java.util.List;
13 | 
14 | /**
15 |  * HBase协处理器实践
16 |  * HBase提供四种协处理器：
17 |  *
18 |  *  RegionObserver 针对get put delete等操作的
19 |  *  RegionServerObserver 针对RegionServer的
20 |  *  WALObserver 针对日志的如滚动、删除
21 |  *  MasterObserver 针对表结构的创建、修改、删除
22 |  *
23 |  *  EndpointObserver 作用是将用户层的逻辑下推到数据层执行，将大量处理结果放在HBase中执行，需要手动调用
24 |  *
25 |  */
26 | public class HBaseProcessor extends BaseRegionObserver {
27 | 
28 |     @Override
29 |     public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException {
30 |         Configuration configuration = HBaseConfiguration.create();
31 |         configuration.set("hbase.zookeeper.quorum", "spark01:2181,spark02:2181,spark03:2181");
32 |         try (Connection connection = ConnectionFactory.createConnection(configuration)) {
33 |             TableName person_back = TableName.valueOf("person_back");
34 |             //表不存在时创建表
35 |             if (!connection.getAdmin().tableExists(person_back)){
36 |                 HColumnDescriptor columnDescriptor = new HColumnDescriptor("info");
37 |                 HTableDescriptor hTableDescriptor = new HTableDescriptor(person_back);
38 |                 hTableDescriptor.addFamily(columnDescriptor);
39 |                 connection.getAdmin().createTable(hTableDescriptor);
40 |             }
41 |             List<Cell> cells = put.get("info".getBytes(), "name".getBytes());
42 |             if (cells.isEmpty()) {
43 |                 return;
44 |             }
45 |             Cell cell = cells.get(0);
46 |             Put put1 = new Put(put.getRow());
47 |             put1.add(cell);
48 |             try (Table person = connection.getTable(person_back)) {
49 |                 person.put(put1);
50 |             }
51 |         }
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hive/MyUDAF.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hive;
 2 | 
 3 | import org.apache.hadoop.hive.ql.exec.UDAF;
 4 | 
 5 | /**
 6 |  * Title:  MyUDAF.java
 7 |  * description: TODO
 8 |  *
 9 |  * @author sev7e0
10 |  * @version 1.0
11 |  * @since 2020-05-06 22:35
12 |  **/
13 | 
14 | public class MyUDAF extends UDAF {
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hive/MyUDF.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hive;
 2 | 
 3 | 
 4 | import com.alibaba.fastjson.JSONObject;
 5 | import org.apache.hadoop.hive.ql.exec.Description;
 6 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 7 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 8 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 9 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
10 | import org.apache.hadoop.io.Text;
11 | 
12 | import java.util.Objects;
13 | 
14 | public class MyUDF extends GenericUDF {
15 | 
16 | 	@Description(
17 | 		name = "用户自定义函数",
18 | 		value = "将json转化为自定义表结构",
19 | 		extended = "select MyUDF(data,'movie') from json;"
20 | 	)
21 | 	public Text evaluate(final String input, String key) {
22 | 		if (Objects.isNull(input)) {
23 | 			return null;
24 | 		}
25 | 		JSONObject jsonObject = JSONObject.parseObject(input);
26 | 		return new Text(String.valueOf(jsonObject.get(key)));
27 | 	}
28 | 
29 | 	public static void main(String[] args) {
30 | 		MyUDF myUDF = new MyUDF();
31 | 		Text movie = myUDF.evaluate("{'movie':'1193','rate':'5','timeStamp':'978300760','uid':'1'}", "movie");
32 | 		System.out.println(movie.toString());
33 | 	}
34 | 
35 | 	@Override
36 | 	public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
37 | 		return null;
38 | 	}
39 | 
40 | 	@Override
41 | 	public Object evaluate(DeferredObject[] arguments) throws HiveException {
42 | 		return null;
43 | 	}
44 | 
45 | 	@Override
46 | 	public String getDisplayString(String[] children) {
47 | 		return null;
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hive/MyUDTF.java:
--------------------------------------------------------------------------------
 1 | package com.tools.hive;
 2 | 
 3 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 4 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 5 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
 6 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 7 | 
 8 | /**
 9 |  * Title:  MyUDTF.java
10 |  * description: TODO
11 |  *
12 |  * @author sev7e0
13 |  * @version 1.0
14 |  * @since 2020-05-06 22:53
15 |  **/
16 | 
17 | public class MyUDTF extends GenericUDTF {
18 | 
19 | 	@Override
20 | 	public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
21 | 		return super.initialize(argOIs);
22 | 	}
23 | 
24 | 	@Override
25 | 	public void process(Object[] args) throws HiveException {
26 | 
27 | 	}
28 | 
29 | 	@Override
30 | 	public void close() throws HiveException {
31 | 
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/hive/sql/test.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/src/main/java/com/tools/hive/sql/test.sql


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/CustomPartitioner.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka;
 2 | 
 3 | import org.apache.kafka.clients.producer.Partitioner;
 4 | import org.apache.kafka.common.Cluster;
 5 | 
 6 | import java.util.Map;
 7 | import java.util.Random;
 8 | 
 9 | public class CustomPartitioner implements Partitioner {
10 |     @Override
11 |     public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
12 | 
13 |         int size = cluster.availablePartitionsForTopic(topic).size();
14 |         Random random = new Random(100);
15 |         return Math.abs(String.valueOf(random.nextInt()).hashCode()%size);
16 |     }
17 | 
18 |     @Override
19 |     public void close() {
20 | 
21 |     }
22 | 
23 |     @Override
24 |     public void configure(Map<String, ?> configs) {
25 | 
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/consumer/ConsumerCommitOffset.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.consumer;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 5 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 6 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 7 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 8 | import org.apache.kafka.common.serialization.StringDeserializer;
 9 | 
10 | import java.time.Duration;
11 | import java.util.Collections;
12 | import java.util.Properties;
13 | 
14 | /**
15 |  * 手动控制提交offset
16 |  */
17 | @Slf4j
18 | public class ConsumerCommitOffset {
19 |     public static final String brokerList = "localhost:9092";
20 |     public static final String topic = "topic-1";
21 |     //新的group，相较于ConsumerQuickStart group-1分组，现在kafka是发布订阅模型
22 |     public static final String groupId = "group-2";
23 |     public static final String out = "topic={} - partition={} - offset={} - value={}";
24 | 
25 |     /**
26 |      * 初始化配置
27 |      *
28 |      * @return
29 |      */
30 |     private static Properties initProperties() {
31 |         Properties properties = new Properties();
32 | 
33 |         properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
34 |         properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
35 |         properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
36 |         properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
37 | 
38 |         //关闭kafka默认的自动提交offset，容易导致重复处理的问题
39 |         properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
40 |         return properties;
41 |     }
42 | 
43 | 
44 |     public static void main(String[] args) {
45 | 
46 |         try (KafkaConsumer<String, String> consumer = new KafkaConsumer<>(initProperties())) {
47 |             consumer.subscribe(Collections.singletonList(topic));
48 |             try {
49 |                 while (true) {
50 |                     ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
51 |                     //                            lo.info(out,
52 |                     //                                    record.topic(),
53 |                     //                                    record.partition(),
54 |                     //                                    record.offset(),
55 |                     //                                    record.value()));
56 |                     //异步提交offset
57 |                     for (ConsumerRecord<String, String> record : records) {
58 |                         consumer.commitAsync();
59 |                     }
60 |                 }
61 |             } finally {
62 |                 //使用同步提交，做最后的把关
63 |                 consumer.commitSync();
64 |             }
65 | 
66 |         }
67 |     }
68 | 
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/consumer/ConsumerInterceptorTTL.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.consumer;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.kafka.clients.consumer.ConsumerInterceptor;
 5 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 6 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 7 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 8 | import org.apache.kafka.common.TopicPartition;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.HashMap;
12 | import java.util.List;
13 | import java.util.Map;
14 | 
15 | /**
16 |  * 自定义消费者拦截器
17 |  * <p>
18 |  * 实现消息过期时间的功能
19 |  */
20 | @Slf4j
21 | public class ConsumerInterceptorTTL implements ConsumerInterceptor<String, String> {
22 |     @Override
23 |     public ConsumerRecords<String, String> onConsume(ConsumerRecords<String, String> consumerRecords) {
24 |         long timeMillis = System.currentTimeMillis();
25 | 
26 |         Map<TopicPartition, List<ConsumerRecord<String, String>>> map = new HashMap<>();
27 | 
28 |         consumerRecords.partitions().forEach(topicPartition -> {
29 |             List<ConsumerRecord<String, String>> recordList = consumerRecords.records(topicPartition);
30 |             List<ConsumerRecord<String, String>> newConsumerRecords = new ArrayList<>();
31 | 
32 |             recordList.forEach(record -> {
33 |                 if (timeMillis - record.timestamp() < 10 * 1000) {
34 |                     newConsumerRecords.add(record);
35 |                 }
36 |             });
37 |             if (!newConsumerRecords.isEmpty()) {
38 |                 map.put(topicPartition, newConsumerRecords);
39 |             }
40 |         });
41 |         return new ConsumerRecords<>(map);
42 |     }
43 | 
44 |     @Override
45 |     public void close() {
46 | 
47 |     }
48 | 
49 |     @Override
50 |     public void onCommit(Map<TopicPartition, OffsetAndMetadata> map) {
51 |         map.forEach((tp, offset) -> System.out.println("tp:{"+tp+"}--offset:{"+offset.offset()+"}"));
52 |     }
53 | 
54 |     @Override
55 |     public void configure(Map<String, ?> map) {
56 | 
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/consumer/ConsumerReBalance.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.consumer;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.kafka.clients.consumer.*;
 5 | import org.apache.kafka.common.TopicPartition;
 6 | import org.apache.kafka.common.serialization.StringDeserializer;
 7 | 
 8 | import java.time.Duration;
 9 | import java.util.*;
10 | 
11 | /**
12 |  * ReBalance监听器的用法，如何做到减少重复消费。
13 |  */
14 | @Slf4j
15 | public class ConsumerReBalance {
16 |     public static final String brokerList = "localhost:9092";
17 |     public static final String topic = "topic-1";
18 |     //新的group，相较于ConsumerQuickStart group-1分组，现在kafka是发布订阅模型
19 |     public static final String groupId = "group-3";
20 |     public static final String out = "topic={} - partition={} - offset={} - value={}";
21 | 
22 |     /**
23 |      * 初始化配置
24 |      *
25 |      * @return
26 |      */
27 |     private static Properties initProperties() {
28 |         Properties properties = new Properties();
29 | 
30 |         properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
31 |         properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
32 |         properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
33 |         properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
34 |         //添加自定义消费者拦截器，可以使用多个拦截器构成拦截链
35 |         //当某个拦截器失败时，下一个会自动从上一个成功后的拦截器开始拦截
36 |         properties.put(ConsumerConfig.INTERCEPTOR_CLASSES_CONFIG, ConsumerInterceptorTTL.class.getName());
37 | 
38 |         //关闭kafka默认的自动提交offset，容易导致重复处理的问题
39 |         properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
40 |         return properties;
41 |     }
42 | 
43 | 
44 |     public static void main(String[] args) {
45 | 
46 |         try (KafkaConsumer<String, String> consumer = new KafkaConsumer<>(initProperties())) {
47 |             Map<TopicPartition, OffsetAndMetadata> map = new HashMap<>();
48 |             consumer.subscribe(Collections.singletonList(topic), new ConsumerRebalanceListener() {
49 |                 @Override
50 |                 public void onPartitionsRevoked(Collection<TopicPartition> collection) {
51 |                     //同步提交
52 |                     consumer.commitSync(map);
53 |                     //亦可以选择存储到DB中。
54 |                 }
55 | 
56 |                 @Override
57 |                 public void onPartitionsAssigned(Collection<TopicPartition> collection) {
58 | 
59 |                 }
60 |             });
61 | 
62 |             try {
63 |                 while (true) {
64 |                     ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
65 |                     records.forEach(record -> {
66 |                                 log.info(out,
67 |                                         record.topic(),
68 |                                         record.partition(),
69 |                                         record.offset(),
70 |                                         record.value());
71 |                                 ///将offset存储到局部变量中，在ReBalance发生前，能够同步的提交offset避免重复消费
72 |                                 map.put(new TopicPartition(record.topic(), record.partition()),
73 |                                         new OffsetAndMetadata(record.offset() + 1));
74 |                             }
75 |                     );
76 |                     //异步提交offset
77 |                     consumer.commitAsync(map, null);
78 |                 }
79 |             } finally {
80 |                 //使用同步提交，做最后的把关
81 |                 consumer.commitSync();
82 |             }
83 | 
84 |         }
85 |     }
86 | 
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/consumer/ConsumerThread.java:
--------------------------------------------------------------------------------
  1 | package com.tools.kafka.consumer;
  2 | 
  3 | import lombok.extern.slf4j.Slf4j;
  4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
  5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
  6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
  7 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
  8 | import org.apache.kafka.common.TopicPartition;
  9 | 
 10 | import java.time.Duration;
 11 | import java.util.Collections;
 12 | import java.util.List;
 13 | import java.util.Map;
 14 | import java.util.Properties;
 15 | import java.util.concurrent.ArrayBlockingQueue;
 16 | import java.util.concurrent.ExecutorService;
 17 | import java.util.concurrent.ThreadPoolExecutor;
 18 | import java.util.concurrent.TimeUnit;
 19 | 
 20 | /**
 21 |  * 客户端消费 多线程方式实现
 22 |  */
 23 | @Slf4j
 24 | public class ConsumerThread extends Thread {
 25 |     private KafkaConsumer<String, String> kafkaConsumer;
 26 | 
 27 |     private ExecutorService executorService;
 28 | 
 29 |     private int threadNum;
 30 | 
 31 | 
 32 |     public ConsumerThread(Properties properties, String topic, int threadNum) {
 33 |         kafkaConsumer = new KafkaConsumer<>(properties);
 34 |         kafkaConsumer.subscribe(Collections.singletonList(topic));
 35 |         this.threadNum = threadNum;
 36 | 
 37 |         executorService = new ThreadPoolExecutor(
 38 |                 threadNum,
 39 |                 threadNum,
 40 |                 0L,
 41 |                 TimeUnit.MILLISECONDS,
 42 |                 new ArrayBlockingQueue<>(1000),
 43 |                 new ThreadPoolExecutor.CallerRunsPolicy());
 44 |     }
 45 | 
 46 |     @Override
 47 |     public void run() {
 48 |         try {
 49 |             while (true) {
 50 |                 ConsumerRecords<String, String> records = kafkaConsumer.poll(Duration.ofMillis(100));
 51 | 
 52 |                 if (!records.isEmpty()) {
 53 |                     executorService.submit(new RecordHandler(records));
 54 |                 }
 55 |                 synchronized (RecordHandler.offsets) {
 56 |                     if (!RecordHandler.offsets.isEmpty()) {
 57 |                         kafkaConsumer.commitSync(RecordHandler.offsets, null);
 58 |                         RecordHandler.offsets.clear();
 59 |                     }
 60 |                 }
 61 |             }
 62 |         } catch (Exception e) {
 63 |             log.error(e.getMessage());
 64 |         } finally {
 65 |             kafkaConsumer.close();
 66 |         }
 67 | 
 68 |     }
 69 | }
 70 | 
 71 | class RecordHandler extends Thread {
 72 |     private ConsumerRecords<String, String> records;
 73 | 
 74 |     public static Map<TopicPartition, OffsetAndMetadata> offsets;
 75 | 
 76 |     public RecordHandler(ConsumerRecords records) {
 77 |         this.records = records;
 78 |     }
 79 | 
 80 |     @Override
 81 |     public void run() {
 82 |         records.partitions()
 83 |                 .forEach(partition -> {
 84 |                     List<ConsumerRecord<String, String>> record = records.records(partition);
 85 | 
 86 |                     long lastConsumerOffset = record.get(record.size() - 1).offset();
 87 | 
 88 |                     synchronized (offsets) {
 89 |                         if (!offsets.containsKey(partition)) {
 90 |                             offsets.put(partition, new OffsetAndMetadata(lastConsumerOffset + 1));
 91 |                         } else {
 92 |                             long position = offsets.get(partition).offset();
 93 |                             if (position < lastConsumerOffset + 1) {
 94 |                                 offsets.put(partition, new OffsetAndMetadata(lastConsumerOffset + 1));
 95 |                             }
 96 |                         }
 97 |                     }
 98 |                 });
 99 |     }
100 | }


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/producer/ProducerRandomInt.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.producer;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.kafka.clients.producer.KafkaProducer;
 5 | import org.apache.kafka.clients.producer.ProducerConfig;
 6 | import org.apache.kafka.clients.producer.ProducerRecord;
 7 | import org.apache.kafka.common.serialization.StringSerializer;
 8 | 
 9 | import java.util.Properties;
10 | import java.util.Random;
11 | 
12 | /**
13 |  * 向kafka中发送随机数
14 |  */
15 | @Slf4j
16 | public class ProducerRandomInt {
17 |     public static final String brokerList = "spark01:9092";
18 |     public static final String topic = "randomCount_new";
19 | 
20 |     /**
21 |      * 初始化参数
22 |      *
23 |      * @return
24 |      */
25 |     private static Properties initProperties() {
26 |         Properties properties = new Properties();
27 | 
28 |         //生产者需要序列化器将对象转换成字节数组才能通过网络发送给kafka服务端
29 |         properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
30 |         //properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
31 |         properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
32 | 
33 |         //acks它代表消息确认机制
34 |         properties.put("acks", "all");
35 |         //重试的次数
36 |         properties.put("retries", 0);
37 |         //批处理数据的大小，每次写入多少数据到topic
38 |         properties.put("batch.size", 2);
39 |         //可以延长多久发送数据
40 |         properties.put("linger.ms", 1);
41 |         properties.put("partitioner.class", "com.tools.kafka.CustomPartitioner");
42 |         //缓冲区的大小
43 |         properties.put("buffer.memory", 33554432);
44 |         properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
45 |         return properties;
46 |     }
47 | 
48 |     public static void main(String[] args) throws InterruptedException {
49 |         Properties properties = initProperties();
50 | 
51 |         KafkaProducer<String, String> producer = new KafkaProducer<>(properties);
52 |         Random random = new Random(10);
53 |         for (int i = 0; i < 1000000; i++) {
54 |             String value = "消息内容："+i+"-----"+random.nextInt(10000);
55 |             ProducerRecord<String, String> record = new ProducerRecord<>(topic, value);
56 |             producer.send(record);
57 |             log.info("已发送：{}条, value为: {}", i, value);
58 | //            Thread.sleep(1000);
59 |         }
60 |         producer.close();
61 |     }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/quickstart/ConsumerQuickStart.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.quickstart;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 7 | 
 8 | import java.time.Duration;
 9 | import java.util.Collections;
10 | import java.util.Properties;
11 | 
12 | @Slf4j
13 | public class ConsumerQuickStart {
14 |     public static final String brokerList = "localhost:9092";
15 |     public static final String topic = "ProducerQuickStart";
16 |     public static final String groupId = "group-1";
17 |     public static final String out = "topic={} - partition={} - offset={} - value={}";
18 | 
19 |     /**
20 |      * 初始化配置
21 |      *
22 |      * @return
23 |      */
24 |     private static Properties initProperties() {
25 |         Properties properties = new Properties();
26 | 
27 |         //
28 |         properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
29 |         properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
30 |         properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
31 |         properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
32 |         return properties;
33 |     }
34 | 
35 | 
36 |     public static void main(String[] args) {
37 | 
38 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(initProperties());
39 | 
40 |         consumer.subscribe(Collections.singletonList(topic));
41 | 
42 |         while (true) {
43 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
44 |             records.forEach(record ->
45 |                     log.info(out,
46 |                     record.topic(),
47 |                     record.partition(),
48 |                     record.offset(),
49 |                     record.value()));
50 |         }
51 |     }
52 | 
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/quickstart/ProducerQuickStart.java:
--------------------------------------------------------------------------------
 1 | package com.tools.kafka.quickstart;
 2 | 
 3 | import org.apache.kafka.clients.producer.KafkaProducer;
 4 | import org.apache.kafka.clients.producer.ProducerConfig;
 5 | import org.apache.kafka.clients.producer.ProducerRecord;
 6 | import org.apache.kafka.common.serialization.StringSerializer;
 7 | 
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * kafka生产者。
12 |  *
13 |  * 主要三大组件：
14 |  * - 序列化器 -> 必须配置
15 |  * - 分区器 -> 选配
16 |  * - 生产者拦截器 -> 选配
17 |  *
18 |  * 全部配置的情况下执行顺序为 生产者拦截器 -> 序列化器 -> 分区器
19 |  */
20 | public class ProducerQuickStart {
21 |     public static final String brokerList = "localhost:9092";
22 |     public static final String topic = "ProducerQuickStart";
23 | 
24 |     /**
25 |      * 初始化参数
26 |      *
27 |      * @return
28 |      */
29 |     private static Properties initProperties() {
30 |         Properties properties = new Properties();
31 | 
32 |         //生产者需要序列化器将对象转换成字节数组才能通过网络发送给kafka服务端
33 |         properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
34 | //        properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
35 |         properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
36 | 
37 |         properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
38 | 
39 |         return properties;
40 |     }
41 | 
42 |     public static void main(String[] args) throws InterruptedException {
43 | 
44 |         //构建producer实例。
45 |         KafkaProducer<String, String> producer = new KafkaProducer<>(initProperties());
46 | 
47 |         for (int i = 100; i < 1000; i++) {
48 | 
49 |             //构建消息实例ProducerRecord
50 |             ProducerRecord<String, String> record = new ProducerRecord<>(topic, "hello kafka-" + i);
51 | 
52 |             //消息发送
53 |             producer.send(record);
54 | 
55 |             Thread.sleep(5000);
56 |         }
57 | 
58 | 
59 |         producer.close();
60 |     }
61 | 
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/kafka/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/src/main/java/com/tools/kafka/readme.md


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/CacheTuning.java:
--------------------------------------------------------------------------------
  1 | package com.tools.redis;
  2 | 
  3 | import com.google.common.hash.BloomFilter;
  4 | import com.google.common.hash.Funnels;
  5 | import redis.clients.jedis.Jedis;
  6 | 
  7 | import java.time.Duration;
  8 | import java.time.LocalTime;
  9 | import java.time.temporal.ChronoField;
 10 | import java.util.HashMap;
 11 | import java.util.UUID;
 12 | import java.util.concurrent.ExecutorService;
 13 | import java.util.concurrent.Executors;
 14 | import java.util.concurrent.atomic.AtomicReference;
 15 | 
 16 | /**
 17 |  * 应对缓存穿透、缓存击穿、缓存雪崩的几种方案
 18 |  */
 19 | public class CacheTuning {
 20 | 
 21 | 	private static final Jedis jedis = new Jedis();
 22 | 	private static final HashMap<String, String> map = new HashMap<>();
 23 | 	private static final ExecutorService threadPool = Executors.newFixedThreadPool(10);
 24 | 
 25 | 	public static void main(String[] args) {
 26 | //		System.out.println(jedis.ping());
 27 | 		map.put("1", "a");
 28 | 		map.put("2", "b");
 29 | 		map.put("3", "b");
 30 | 
 31 | //        for (int i = 0; i < 1; i++) {
 32 | //            threadPool.execute(() -> map.keySet().forEach(key -> System.out.printf(get(key))));
 33 | //        }
 34 | //		System.out.println(safeUpdateCache("3"));
 35 | 
 36 |         bloomFilter();
 37 | 	}
 38 | 
 39 | 	/**
 40 | 	 * 缓存穿透
 41 | 	 *
 42 | 	 * 缓存穿透指的是查询一个不存在数据，由于缓存中一定不存在，那么每一次请求都会被打到db层，这样就会造成db挂掉的问题。
 43 | 	 */
 44 | 	/**
 45 | 	 * 1.使用bloom filter进行拦截
 46 | 	 * <p>
 47 | 	 * 有很多种方法可以有效地解决缓存穿透问题，最常见的则是采用布隆过滤器，将所有可能存在的数据哈希到一个足够大的bitmap中，一个一定不存在的数据会被 这个bitmap拦截掉，
 48 | 	 * 从而避免了对底层存储系统的查询压力。另外也有一个更为简单粗暴的方法（我们采用的就是这种），如果一个查询返回的数据为空（不管是数 据不存在，还是系统故障），
 49 | 	 * 我们仍然把这个空结果进行缓存，但它的过期时间会很短，最长不超过五分钟。
 50 | 	 * <p>
 51 | 	 * 2. 无论db返回什么都进行缓存，但如果缓存的为空值，那么可以设置他的过期时间较短，比如五分钟
 52 | 	 */
 53 | 	private static void bloomFilter() {
 54 | 		int size = 1000000;
 55 | 		BloomFilter<CharSequence> bloomFilter = BloomFilter.create(Funnels.stringFunnel(), size);
 56 | 
 57 | 		for (int i = 0; i < size; i++) {
 58 | 			bloomFilter.put(String.valueOf(i));
 59 | 		}
 60 | 		LocalTime before = LocalTime.now();
 61 | 		if (bloomFilter.mightContain(String.valueOf(-1))) {
 62 | 			System.out.println("mightContain");
 63 | 		}
 64 | 		LocalTime now = LocalTime.now();
 65 | 		System.out.println(Duration.between(before, now).getNano());
 66 | 
 67 | 	}
 68 | 
 69 | 
 70 | 	/**
 71 | 	 * 缓存击穿
 72 | 	 * 缓存在某个时间点过期的时候，恰好在这个时间点对这个Key有大量的并发请求过来，
 73 | 	 * 这些请求发现缓存过期一般都会从后端DB加载数据并回设到缓存，这个时候大并发的请求可能会瞬间把后端DB压垮。
 74 | 	 *
 75 | 	 * 与缓存穿透不同点在于，其是key实在db中存在的，不过某一时刻过期了导致不能够被获取到，请求就又转发到了db中。
 76 | 	 *
 77 | 	 * 与缓存雪崩不同的是，雪崩是大面积的key同时失效。
 78 | 	 *
 79 | 	 *
 80 | 	 * 1.使用分布式互斥锁的方式 解决缓存中找不到对应值的问题
 81 | 	 * <p>
 82 | 	 * 简单说就是当检测到一个key失效时，对其使用分布式锁。用一个获取到锁的线程去加载数据，其他线程等在加载完成，锁被
 83 | 	 * 解除了才能够继续使用继续获取。
 84 | 	 * <p>
 85 | 	 * 2.不由redis控制过期时间，由程序维护，无论有没有查询到数据都直接返回。在获得的数据时若发现超时，则由程序发起异步线程进行缓存更新。
 86 | 	 * 优点是不会产生死锁，缺点数据一致性较低
 87 | 	 */
 88 | 	private static String updateCache(String key) {
 89 | 		String stop = "stop";
 90 | 		String value = jedis.get(key);
 91 | 		if (value == null) {
 92 | 			if (jedis.setnx(stop, "1") == 1) {
 93 | 				System.out.println("已获取到锁，正在更新缓存");
 94 | 				jedis.expire(stop, 3 * 60);
 95 | 				value = dbGet(key);
 96 | 				jedis.set(key, value);
 97 | 				System.out.println("缓存更新完成！！！");
 98 | 				jedis.del(stop);
 99 | 			} else {
100 | 				try {
101 | 					Thread.sleep(50);
102 | 				} catch (InterruptedException e) {
103 | 					e.printStackTrace();
104 | 				}
105 | 				System.out.println("当前已被加锁，准备重试");
106 | 				value = updateCache(key);
107 | 			}
108 | 
109 | 		}
110 | 		return value;
111 | 	}
112 | 
113 | 	/**
114 | 	 * 使用更好的锁方式实现，不推荐上边的加锁方式，存在线程不安全的问题
115 | 	 *
116 | 	 * @param key
117 | 	 * @return
118 | 	 */
119 | 	private static String safeUpdateCache(String key) {
120 | 		String stop = "stop";
121 | 		String lockId = UUID.randomUUID().toString();
122 | 		String value = jedis.get(key);
123 | 		if (value == null) {
124 | 			//redis分布式锁
125 | 			if (DistributedTool.acquireDistributedLock(jedis, stop, lockId, Long.valueOf(180))) {
126 | 				System.out.println("已获取到锁，正在更新缓存");
127 | 				value = dbGet(key);
128 | 				jedis.set(key, value);
129 | 				System.out.println("缓存更新完成！！！");
130 | 				DistributedTool.releaseDistributedLock(jedis, stop, lockId);
131 | 			} else {
132 | 				try {
133 | 					Thread.sleep(50);
134 | 				} catch (InterruptedException e) {
135 | 					e.printStackTrace();
136 | 				}
137 | 				System.out.println("当前已被加锁，准备重试");
138 | 				value = safeUpdateCache(key);
139 | 			}
140 | 
141 | 		}
142 | 		return value;
143 | 	}
144 | 
145 | 	/**
146 | 	 * 2. 该种方式就是模拟当获取不到值时，使用一个新的线程进行数据库值进行更新，问题是
147 | 	 * 这种方式问题是数据一致性较低，当第一次获取时永远时返回为空。
148 | 	 */
149 | 	private static String getByTimeOut(String key) {
150 | 		String stop = "stop";
151 | 		AtomicReference<String> value = new AtomicReference<>(jedis.get(key));
152 | 		String[] split = value.get().split(".");
153 | 		String relValue = split[0];
154 | 		Long timeout = Long.valueOf(split[1]);
155 | 		if (timeout < LocalTime.now().getLong(ChronoField.NANO_OF_DAY)) {
156 | 			threadPool.execute(() -> {
157 | 				if (jedis.setnx(stop, "1") == 1) {
158 | 					System.out.println("已获取到锁，正在更新缓存");
159 | 					jedis.expire(stop, 3 * 60);
160 | 					value.set(dbGet(key));
161 | 					jedis.set(key, value.get());
162 | 					System.out.println("缓存更新完成！！！");
163 | 					jedis.del(stop);
164 | 				}
165 | 			});
166 | 		}
167 | 		return value.get();
168 | 	}
169 | 
170 | 	private static String dbGet(String key) {
171 | 		try {
172 | 			Thread.sleep(3000);
173 | 		} catch (InterruptedException e) {
174 | 			e.printStackTrace();
175 | 		}
176 | 		return map.get(key);
177 | 	}
178 | 
179 | 	/**
180 | 	 * 缓存雪崩
181 | 	 *
182 | 	 * 缓存雪崩是指在我们设置缓存时采用了相同的过期时间，导致缓存在某一时刻同时失效，请求全部转发到DB，DB瞬时压力过重雪崩。
183 | 	 *
184 | 	 *
185 | 	 * 1.使用加锁或者队列的单线程方式，保证在有大量的数据失效时，不会有大量的并发请求发送到DB，导致压力过大
186 | 	 *
187 | 	 *
188 | 	 * 2.在原有的过期时间上 随机添加一些时间，由于过期时间不同，就能减轻在过期时产生的压力
189 | 	 */
190 | }
191 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/DistributedTool.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis;
 2 | 
 3 | import org.slf4j.Logger;
 4 | import org.slf4j.LoggerFactory;
 5 | import redis.clients.jedis.Jedis;
 6 | 
 7 | import java.util.Collections;
 8 | 
 9 | /**
10 |  * 基于redis实现的分布式锁,当前只是针对单节点模式。
11 |  * <p>
12 |  * redis集群情况下可以考虑使用redisson。
13 |  */
14 | public class DistributedTool {
15 | 
16 |     private static final Logger logger = LoggerFactory.getLogger(DistributedTool.class);
17 | 
18 |     private static final String LOCK_STATUS = "OK";
19 |     /**
20 |      * 当key不存在时进行当前操作，若存在则不操作
21 |      */
22 |     private static final String SET_IF_NOT_EXIST = "NX";
23 |     /**
24 |      * 设定key的超时时间
25 |      */
26 |     private static final String SET_WITH_EXPIRE_TIME = "PX";
27 | 
28 |     /**
29 |      * @param jedis  redis客户端
30 |      * @param key    使用key来当作锁，保证唯一性
31 |      * @param lockId 要保证每次加锁和解锁是一个来自客户端，只用一个key是无法保证的，这里使用value值作为一次完整加锁解锁请求id来保证。
32 |      * @param time   超时时间，设定了超时时间后，即使持有锁的客户端发生崩溃，key也会因为过期而自动删除，从而释放锁。
33 |      * @return 加锁状态
34 |      */
35 |     public static Boolean acquireDistributedLock(Jedis jedis, String key, String lockId, Long time) {
36 |         String status = jedis.set(key, lockId, SET_IF_NOT_EXIST, SET_WITH_EXPIRE_TIME, time);
37 |         if (LOCK_STATUS.equals(status)) {
38 |             logger.info("获取锁成功，当前：" + key + "-" + lockId);
39 |             return true;
40 |         }
41 |         return false;
42 |     }
43 | 
44 | 
45 |     /**
46 |      * @param jedis  redis客户端
47 |      * @param key    锁
48 |      * @param lockId 锁对应的请求id
49 |      * @return 释放锁结果
50 |      * <p>
51 |      * 使用lua脚本是为了保证操作的原子性，redisson中使用同样的方式进行锁释放。
52 |      * <p>
53 |      * 为什么锁redis使用lua是线程安全的？
54 |      * 因为redis本身就是单线程的，而redis内置了lua的解析器，从而能保证线程安全（不够严谨）
55 |      */
56 |     public static Boolean releaseDistributedLock(Jedis jedis, String key, String lockId) {
57 |         //lua脚本
58 |         String luaScript = "if redis.call('get', KEYS[1]) == ARGV[1] then return redis.call('del', KEYS[1]) else return 0 end";
59 |         //调用evel交给redis服务端执行脚本
60 |         Object status = jedis.eval(luaScript, Collections.singletonList(key), Collections.singletonList(lockId));
61 |         if (LOCK_STATUS.equals(status)) {
62 |             logger.info("释放锁成功，当前：" + key + "-" + lockId);
63 |             return true;
64 |         }
65 |         return false;
66 |     }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/lettuce/LettuceTools.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis.lettuce;
 2 | 
 3 | public enum LettuceTools {
 4 | 
 5 |     URL("redis://localhost:6379/0");
 6 | 
 7 |     private String value;
 8 | 
 9 |     LettuceTools(String value){
10 |         this.value = value;
11 |     }
12 | 
13 |     public String getValue() {
14 |         return value;
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/lettuce/MyListener.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis.lettuce;
 2 | 
 3 | import io.lettuce.core.pubsub.RedisPubSubListener;
 4 | 
 5 | public class MyListener implements RedisPubSubListener {
 6 |     @Override
 7 |     public void message(Object channel, Object message) {
 8 | 
 9 |     }
10 | 
11 |     @Override
12 |     public void message(Object pattern, Object channel, Object message) {
13 | 
14 |     }
15 | 
16 |     @Override
17 |     public void subscribed(Object channel, long count) {
18 | 
19 |     }
20 | 
21 |     @Override
22 |     public void psubscribed(Object pattern, long count) {
23 | 
24 |     }
25 | 
26 |     @Override
27 |     public void unsubscribed(Object channel, long count) {
28 | 
29 |     }
30 | 
31 |     @Override
32 |     public void punsubscribed(Object pattern, long count) {
33 | 
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/lettuce/PubSubByLettuce.java:
--------------------------------------------------------------------------------
  1 | package com.tools.redis.lettuce;
  2 | 
  3 | import io.lettuce.core.RedisClient;
  4 | import io.lettuce.core.RedisFuture;
  5 | import io.lettuce.core.cluster.RedisClusterClient;
  6 | import io.lettuce.core.cluster.pubsub.StatefulRedisClusterPubSubConnection;
  7 | import io.lettuce.core.cluster.pubsub.api.async.RedisClusterPubSubAsyncCommands;
  8 | import io.lettuce.core.cluster.pubsub.api.sync.RedisClusterPubSubCommands;
  9 | import io.lettuce.core.pubsub.StatefulRedisPubSubConnection;
 10 | import io.lettuce.core.pubsub.api.async.RedisPubSubAsyncCommands;
 11 | import io.lettuce.core.pubsub.api.reactive.RedisPubSubReactiveCommands;
 12 | import io.lettuce.core.pubsub.api.sync.RedisPubSubCommands;
 13 | import org.slf4j.Logger;
 14 | import org.slf4j.LoggerFactory;
 15 | 
 16 | /**
 17 |  *
 18 |  */
 19 | public class PubSubByLettuce {
 20 | 
 21 |     private static Logger logger = LoggerFactory.getLogger(PubSubByLettuce.class);
 22 | 
 23 |     public static void main(String[] args) {
 24 |         RedisClient redisClient = RedisClient.create(LettuceTools.URL.getValue());
 25 | 
 26 |         /**
 27 |          * 同步订阅 synchronous subscription
 28 |          */
 29 |         StatefulRedisPubSubConnection<String, String> pubSub = redisClient.connectPubSub();
 30 | 
 31 |         pubSub.addListener(new MyListener());
 32 | 
 33 |         RedisPubSubCommands<String, String> sync = pubSub.sync();
 34 | 
 35 |         sync.subscribe("channel");
 36 | 
 37 | 
 38 |         /**
 39 |          * 异步订阅 asynchronous subscription
 40 |          */
 41 |         StatefulRedisPubSubConnection<String, String> pubSub1 = redisClient.connectPubSub();
 42 | 
 43 |         pubSub1.addListener(new MyListener());
 44 | 
 45 |         RedisPubSubAsyncCommands<String, String> async = pubSub1.async();
 46 | 
 47 |         //异步将会返回future
 48 |         RedisFuture<Void> future = async.subscribe("channel");
 49 | 
 50 |         future.whenComplete((s,th)->{
 51 |             if (th instanceof Exception){
 52 |                 logger.info(th.getMessage());
 53 |             }
 54 |         });
 55 | 
 56 | 
 57 |         /**
 58 |          * 使用reactive订阅
 59 |          */
 60 | 
 61 |         StatefulRedisPubSubConnection<String, String> pubSub2 = redisClient.connectPubSub();
 62 | 
 63 |         RedisPubSubReactiveCommands<String, String> reactive = pubSub2.reactive();
 64 | 
 65 |         reactive.subscribe("channel").subscribe();
 66 | 
 67 |         //将会接收到所有进来的消息，可以进行过滤操作，observe会在取消订阅时停止。
 68 |         reactive.observeChannels().doOnNext(message-> logger.info(message.getMessage())).subscribe();
 69 | 
 70 | 
 71 |         /**
 72 |          * 在redis集群中使用订阅功能
 73 |          *
 74 |          * 在redis集群中可以是用订阅，但有几点需要注意：
 75 |          *
 76 |          * 用户在集群的一个节点上发布消息，集群会自动向所有节点广播，不论这台机器是否订阅了
 77 |          * 这个channel，这也就表示在集群中订阅消息时，不需要连接指定的消息发布的节点，任意
 78 |          * 任意一个节点都可以。
 79 |          */
 80 | 
 81 | 
 82 |         RedisClusterClient clusterClient = RedisClusterClient.create(LettuceTools.URL.getValue());
 83 | 
 84 |         StatefulRedisClusterPubSubConnection<String, String> connection = clusterClient.connectPubSub();
 85 | 
 86 |         //同步
 87 |         RedisClusterPubSubCommands<String, String> sync1 = connection.sync();
 88 | 
 89 |         sync1.subscribe("channel");
 90 | 
 91 |         //异步
 92 |         RedisClusterPubSubAsyncCommands<String, String> async1 = connection.async();
 93 | 
 94 |         async1.subscribe("channel");
 95 | 
 96 | 
 97 | 
 98 | 
 99 |         StatefulRedisClusterPubSubConnection<String, String> connection0 = clusterClient.connectPubSub();
100 |         connection0.addListener(new MyListener());
101 |         connection0.setNodeMessagePropagation(true);
102 |         RedisClusterPubSubCommands<String, String> sync2 = connection0.sync();
103 |         sync2.masters().commands().subscribe("__keyspace@0__:*");
104 | 
105 | 
106 |         /**
107 |          * 注意事项
108 |          *
109 |          * 复制到副本节点的键，特别是考虑到到期，会在保存该键的所有节点上生成键空间事件。如果一个密钥过期并被复制，它将在主副
110 |          * 本和所有副本上过期。每个redis服务器都会发出keyspace事件。因此，订阅非主节点将使您的应用程序看到同一个密钥的同一类
111 |          * 型的多个事件，因为redis是分布式的。
112 |          *
113 |          * 订阅可以通过使用nodeselection api或对单个集群节点连接调用subscribe（…）来发出。订阅注册不会传播到拓扑更改时添
114 |          * 加的新节点。
115 |          */
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/lettuce/QuickStartByLettuce.java:
--------------------------------------------------------------------------------
  1 | package com.tools.redis.lettuce;
  2 | 
  3 | import io.lettuce.core.LettuceFutures;
  4 | import io.lettuce.core.RedisClient;
  5 | import io.lettuce.core.RedisFuture;
  6 | import io.lettuce.core.api.StatefulRedisConnection;
  7 | import io.lettuce.core.api.async.RedisAsyncCommands;
  8 | import io.lettuce.core.api.sync.RedisCommands;
  9 | import org.slf4j.Logger;
 10 | import org.slf4j.LoggerFactory;
 11 | 
 12 | import java.util.concurrent.ExecutionException;
 13 | import java.util.concurrent.TimeUnit;
 14 | import java.util.concurrent.TimeoutException;
 15 | import java.util.stream.IntStream;
 16 | 
 17 | public class QuickStartByLettuce {
 18 |     private static final Logger logger = LoggerFactory.getLogger(QuickStartByLettuce.class);
 19 | 
 20 |     private final static String URI = "redis://localhost:6379/0";
 21 | 
 22 |     public static void main(String[] args) throws InterruptedException {
 23 | 
 24 |         /**
 25 |          * 同步方式
 26 |          */
 27 | 
 28 |         logger.info("使用同步API执行命令");
 29 |         //创建RedisClient实例
 30 |         RedisClient redisClient = RedisClient.create(URI);
 31 | 
 32 |         //创建redis连接
 33 |         StatefulRedisConnection<String, String> connect = redisClient.connect();
 34 | 
 35 |         //获取用于同步执行的命令API。lettuce也支持异步(async())和反应式执行模型(reactive())。
 36 |         //返回RedisCommands
 37 |         RedisCommands<String, String> redisCommands = connect.sync();
 38 | 
 39 |         //可直接使用redis的command
 40 |         redisCommands.set("test", "value");
 41 | 
 42 |         logger.info(redisCommands.get("test"));
 43 | 
 44 | 
 45 |         /**
 46 |          * 需要手动关闭连接，连接默认设计为长连接且线程安全
 47 |          * 当前链接失效时会自动重连，一直到close()被调用
 48 |          */
 49 |         connect.close();
 50 | 
 51 | 
 52 |         /**
 53 |          * 异步方式
 54 |          */
 55 | 
 56 |         logger.info("使用异步API执行命令");
 57 |         StatefulRedisConnection<String, String> aconnect = redisClient.connect();
 58 | 
 59 |         RedisAsyncCommands<String, String> asyncCommands = aconnect.async();
 60 | 
 61 |         asyncCommands.set("async", "command");
 62 | 
 63 |         //在lettuce中使用异步API执行command将会返回RedisFuture，
 64 |         //他是继承自CompletionStage，可以取消(cancel())，也可以查询执行状态(isDone(),isCancelled())
 65 |         RedisFuture<String> async = asyncCommands.get("async");
 66 | 
 67 |         try {
 68 | 
 69 |             //可以从RedisFuture获取到返回的结果。
 70 |             logger.info(async.get());
 71 | 
 72 |             //将会等待10秒，再去获取RedisFuture返回的值
 73 |             //超时将会抛出TimeoutException
 74 |             logger.info(async.get(5, TimeUnit.SECONDS));
 75 | 
 76 |             //有结果返回时将会调用。
 77 |             async.thenAccept(s -> logger.info(s));
 78 | 
 79 |             //有结果返回后，使用异步线程执行
 80 |             async.thenAcceptAsync(s -> logger.info("返回后使用异步线程执行"));
 81 | 
 82 |         } catch (InterruptedException e) {
 83 |             logger.error(e.getMessage());
 84 |         } catch (ExecutionException e) {
 85 |             logger.error(e.getMessage());
 86 |         } catch (TimeoutException e) {
 87 |             logger.error(e.getMessage());
 88 |         }
 89 | 
 90 |         try {
 91 |             Thread.sleep(5000);
 92 |         } catch (InterruptedException e) {
 93 |             e.printStackTrace();
 94 |         }
 95 | 
 96 | 
 97 |         /**
 98 |          * 同步使用future，暂未完成
 99 |          */
100 | 
101 |         logger.info("代码不会等到某个命令完成后再发出另一个命令。同步是在发出所有命令之后完成的。");
102 |         LettuceFutures.awaitAll(1, TimeUnit.MINUTES, IntStream.range(0, 10).mapToObj(i -> asyncCommands.set("key-" + i, "value-" + i)).toArray(RedisFuture[]::new));
103 | 
104 | 
105 |         logger.info("对单个futur也可以使用await");
106 |         RedisFuture<String> future = asyncCommands.get("key-0");
107 |         if (!future.await(1, TimeUnit.MINUTES)) {
108 |             System.out.println("在超时时间内未完成！");
109 |         }
110 | 
111 | 
112 |         logger.info("还有一种使用阻塞future的是采用循环的方式");
113 |         RedisFuture<String> future1 = asyncCommands.get("key-1");
114 |         while (!future1.isDone()) {
115 |             logger.info("当前查询任务还未完成，继续阻塞");
116 |         }
117 | 
118 | 
119 |         /**
120 |          * 错误处理
121 |          *
122 |          * 1.返回默认值
123 |          * 2.使用备用的future
124 |          * 3.重试future
125 |          */
126 | 
127 |         //可以使用handle函数在出现异常时返回默认值
128 |         future1.handle((s, throwable) -> {
129 |             if (throwable != null) {
130 |                 return "default value";
131 |             }
132 |             return s;
133 |         }).thenAccept(s -> logger.info("获取到的value为：{}", s));
134 | 
135 | 
136 |         //future支持可以根据不同的返回异常的类型，使用不同的默认值
137 |         future1.exceptionally(throwable -> {
138 |             if (throwable instanceof IllegalStateException) {
139 |                 return "IllegalStateException";
140 |             } else if (throwable instanceof ExecutionException) {
141 |                 return "ExecutionException";
142 |             }
143 |             return "default value";
144 |         }).thenAccept(s -> logger.info("当前返回值为：{}", s));
145 | 
146 | 
147 |         //
148 |         future1.whenComplete((s, throwable) -> {
149 |             if (throwable instanceof IllegalStateException) {
150 |                 logger.error("异常为：{}", throwable.getMessage());
151 |             }
152 |         }).thenAccept(s -> logger.info("当前value：{}", s));
153 | 
154 | 
155 |         //关闭实例，释放线程和资源。
156 |         redisClient.shutdown();
157 |     }
158 | 
159 | 
160 | }
161 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/lettuce/TransactionsByLettuce.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis.lettuce;
 2 | 
 3 | import io.lettuce.core.KeyValue;
 4 | import io.lettuce.core.RedisClient;
 5 | import io.lettuce.core.RedisFuture;
 6 | import io.lettuce.core.TransactionResult;
 7 | import io.lettuce.core.api.async.RedisAsyncCommands;
 8 | import io.lettuce.core.api.reactive.RedisReactiveCommands;
 9 | import io.lettuce.core.api.sync.RedisCommands;
10 | import lombok.extern.slf4j.Slf4j;
11 | 
12 | import java.util.List;
13 | import java.util.concurrent.ExecutionException;
14 | 
15 | @Slf4j
16 | public class TransactionsByLettuce {
17 | 
18 |     public static void main(String[] args) throws ExecutionException, InterruptedException {
19 |         RedisClient client = RedisClient.create(LettuceTools.URL.getValue());
20 | 
21 | 
22 |         /**
23 |          *
24 |          * Transactions using the asynchronous API
25 |          *
26 |          * 与非事务方式接近，同样是返回RedisFuture，可以对这个返回的future使用与
27 |          * 非事务方式同样的操作
28 |          */
29 |         RedisAsyncCommands<String, String> async = client.connect().async();
30 | 
31 |         async.multi();
32 | 
33 |         async.set("key3", "value3");
34 | 
35 |         RedisFuture<String> set = async.get("key5");
36 | 
37 |         RedisFuture<TransactionResult> future = async.exec();
38 | 
39 |         TransactionResult objects = future.get();
40 |         log.info("第一次返回为{}, 第二次返回为{}", set.get(), objects.get(1));
41 |         if (objects.get(0) == set.get()) {
42 |             log.info("结果相同");
43 |         }
44 | 
45 | 
46 |         /**
47 |          * Transactions using the reactive API
48 |          *
49 |          * 使用react api可以在一步执行多个命令
50 |          *
51 |          * 以下代码启动事务，在事务中执行两个命令，最后执行事务
52 |          */
53 | 
54 |         RedisReactiveCommands<String, String> reactive = client.connect().reactive();
55 | 
56 |         reactive.multi().subscribe(multiResponse -> {
57 |             reactive.set("key", "1").subscribe();
58 |             reactive.incr("key").subscribe();
59 |             reactive.exec().subscribe();
60 |         });
61 | 
62 |         /**
63 |          * Transactions on clustered connections
64 |          *
65 |          * 默认情况下，集群会自动路由，意味着你不能确定你的命令是在
66 |          * 那一台节点上执行的，所以当执行在集群环境时，使用普通的事务
67 |          * 命令即可。
68 |          */
69 | 
70 |         RedisCommands<String, String> redis = client.connect().sync();
71 |         redis.multi();
72 |         redis.set("one", "1");
73 |         redis.set("two", "2");
74 |         redis.mget("one", "two");
75 |         redis.llen("key");
76 | 
77 |         redis.exec(); // result: list("OK", "OK", list("1", "2"), 0L)
78 | 
79 | 
80 |         /**
81 |          * Mult executing multiple asynchronous commands
82 |          */
83 |         RedisAsyncCommands<String, String> async1 = client.connect().async();
84 |         async1.multi();
85 |         RedisFuture<String> set1 = async1.set("one", "1");
86 |         RedisFuture<String> set2 = async1.set("two", "2");
87 |         RedisFuture<List<KeyValue<String, String>>> mget = async1.mget("one", "two");
88 |         RedisFuture<Long> llen = async1.llen("key");
89 | 
90 |         set1.thenAccept(value -> log.info(value)); // OK
91 | 
92 |         RedisFuture<TransactionResult> exec = async1.exec(); // result: list("OK", "OK", list("1", "2"), 0L)
93 |         exec.thenAccept(value -> log.info(value.get(0)));
94 | 
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/redisson/RedissonDelayQueue.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis.redisson;
 2 | 
 3 | import org.redisson.Redisson;
 4 | import org.redisson.api.RBlockingQueue;
 5 | import org.redisson.api.RDelayedQueue;
 6 | import org.redisson.api.RedissonClient;
 7 | import org.redisson.config.Config;
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | import java.util.concurrent.TimeUnit;
12 | 
13 | /**
14 |  * Title:  RedissonDelayQueue.java
15 |  * description: TODO
16 |  *
17 |  * @author sev7e0
18 |  * @version 1.0
19 |  * @since 2021-01-25 11:56
20 |  **/
21 | 
22 | public class RedissonDelayQueue {
23 | 
24 | 	static final Logger logger = LoggerFactory.getLogger(RedissonDelayQueue.class);
25 | 
26 | 	public static void main(String[] args) throws InterruptedException {
27 | 		final Config config = new Config();
28 | 		config.useSingleServer().setAddress("redis://localhost:6379/0");
29 | 		final RedissonClient redissonClient = Redisson.create(config);
30 | 
31 | 
32 | 		for (int i = 0; i < 100; i++) {
33 | 			final RBlockingQueue<String> blockingQueue = redissonClient.getBlockingQueue("delay_queue");
34 | 
35 | 			final RDelayedQueue<String> delayedQueue = redissonClient.getDelayedQueue(blockingQueue);
36 | 			final String s = "obj test666:" + i;
37 | 			delayedQueue.offer(s, 100-i, TimeUnit.SECONDS);
38 | 			logger.info("消息入队，内容：{},时间 ：{}", s, i);
39 | 			delayedQueue.destroy();
40 | 		}
41 | 
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/redisson/RedissonDelayQueueConsumer.java:
--------------------------------------------------------------------------------
 1 | package com.tools.redis.redisson;
 2 | 
 3 | import org.redisson.Redisson;
 4 | import org.redisson.api.RBlockingQueue;
 5 | import org.redisson.api.RDelayedQueue;
 6 | import org.redisson.api.RedissonClient;
 7 | import org.redisson.config.Config;
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | import java.util.Objects;
12 | import java.util.concurrent.TimeUnit;
13 | 
14 | /**
15 |  * Title:  RedissonDelayQueue.java
16 |  * description: TODO
17 |  *
18 |  * @author sev7e0
19 |  * @version 1.0
20 |  * @since 2021-01-25 11:56
21 |  **/
22 | 
23 | public class RedissonDelayQueueConsumer {
24 | 
25 | 	static final Logger logger = LoggerFactory.getLogger(RedissonDelayQueueConsumer.class);
26 | 
27 | 	public static void main(String[] args) throws InterruptedException {
28 | 		final Config config = new Config();
29 | 		config.useSingleServer().setAddress("redis://localhost:6379/0");
30 | 		final RedissonClient redissonClient = Redisson.create(config);
31 | 
32 | 		final RBlockingQueue<String> blockingQueue = redissonClient.getBlockingQueue("delay_queue");
33 | 
34 | 		while (true) {
35 | 			final String poll = blockingQueue.poll(2, TimeUnit.SECONDS);
36 | 			if (Objects.isNull(poll)) {
37 | 				continue;
38 | 			}
39 | 			logger.info("消息出队队，内容：{}", poll);
40 | 		}
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/redis持久化详解.md:
--------------------------------------------------------------------------------
 1 | # redis持久化详解
 2 | 
 3 | 在redis中提供了两种持久化方式RDB（快照）和AOF，不过两种各有利弊，接下来详细说一下。
 4 | 
 5 | ## RDB持久化方式
 6 | 
 7 | ![20190820213301571.png](https://files.sev7e0.site/images/oneblog/20190820213301571.png)
 8 | 
 9 | RBD持久化是redis的默认策略。
10 | 
11 | redis会生成一个二进制文件，默认情况下dump.rdb会叫这个名字，但这个文件是可以被redis还原成文件生成时redis的状态。
12 | 
13 | redis支持两个命令生成RDB文件，`SAVE`和`BGSAVE`，主要区别在一前者为阻塞方式生成文件。
14 | 后者则是以子进程(fork)的方式进行生成文件，也就以为这不会产生阻塞，父进程可以继续响应请求，这里主要介绍`BGSAVE`该种方式，应为第一种方式将会阻塞，在大量数据的情况下，服务将尝试建不可用。
15 | 
16 | #### fork
17 | 
18 | Linux 提供的一种进程机制，当前进程调用 fork 将会产生一个子进程，该子进程会与当前进程共享一块内存，也就是数据段和代码段都是相同的。
19 | 
20 | 子进程在在得到了内存后，开始疯狂的写入文件做持久化，此时若当前进程接受到新的请求，进行数据更改时，将会把共享的内存段复制一份，当前进程会基于新的内存数据进行操作。
21 | 这样一来，我们就不用再去担心当前进程对即将产生的快照产生影响了。
22 | 
23 | 
24 | ** 若在BGSAVE执行期间，手动再次调用`SAVE`、`BGSAVE`或`BGREWRITEAOF`命令会产生两次备份么？**
25 | 
26 | redis规定在备份命令执行期间，再次执行以上命令则不会被执行，为了防止产生竞争同时执行两个`rdbSave`，`SAVE`、`BGSAVE`两个命令不会执行。
27 | 而`BGREWRITEAOF`命令会在，前一个命令执行完成后开始执行。
28 | 
29 | 但如果是`BGREWRITEAOF`命令正在执行，此时客户端发送`BGSAVE`命令则会被拒绝。
30 | 
31 | 具体原因是两个命令都是由子进程执行的，所以在操作方面没有冲突的地方，不能同时执行的原因是性能上的考虑——并发出两个子进程，并且这两个子进程都会同时执行大量 io（磁盘写入）操作。
32 | 
33 | ## AOF持久化方式
34 | 
35 | ![20190820213255264.jpg](https://files.sev7e0.site/images/oneblog/20190820213255264.jpg)
36 | 
37 | AOF 持久化在 Redis 中默认为关闭，他类似于 Mysql 的 Binlog，是一个用来记录 Redis 所有操作命令的日志，若在 redis 启动时就开启了AOF，那么他将记录所有的操作命令，在进行恢复时只需要进行 AOF 的命令重放即可。
38 | 
39 | 正因为他记录了所有的操作命令，所以他也存在一些问题：
40 | - 随着 AOF 记录的操作命令越来越多导致生成的文件很大。
41 | - 由于生成的文件大，导致在进行命令重放的时候时间较长。
42 | 
43 | redis 中 AOF 的写操作是在逻辑处理之后，导致 redis 无法进行数据的回滚，这也是与 MySQL 的 binlog不同的一点。
44 | 
45 | bgrewriteaof：针对上述的问题，Redis 在 2.4 之后也使用了 bgrewriteaof 对 AOF 日志进行瘦身。
46 | 
47 | bgrewriteaof 命令用于异步执行一个 AOF 文件重写操作。重写会创建一个当前 AOF 文件的体积优化版本。
48 |             
49 | ## RDB && AOF 混合持久化方式
50 | 
51 | 由于两种方式都存在一些问题，使用 RDB 会导致在写入时丢失新写入的数据，而使用 AOF 会存在持久化文件过大导致恢复时停机时间较长的问题。
52 | 
53 | 所以在Redis4.0之后引入了新的持久化机制，将 RDB 与 AOF 进行结合，使用 RDB 策略进行持久化，同时将这段时间内的操作使用 AOF 进行记录，这样既能够快速的生成文件，同时 AOF 不在需要进行全量的操作记录，只需要保存前一次 RDB 开始后的增量 AOF 即可，这样生成的 AOF 持久化文件将不会再过大。


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/redis的五大数据类型.md:
--------------------------------------------------------------------------------
  1 | ## redis五种数据类型
  2 | 
  3 | ### 字符串（string）
  4 | 
  5 | **字符串有哪些常用的命令？**
  6 | 
  7 | | op | 注释 |
  8 | |---|---|
  9 | | APPEND | 将value值追加到给定的key当前对应的value的末尾 |
 10 | | GETRANGE | 获取一个给定范围内的字符串 |
 11 | | SETRANGE | 将指定的位置开始设定为给定值 |
 12 | | GETBIT | 将字符串看成二进制串，返回偏移量在子串中所对应的值 |
 13 | | SETBIT | 将字符串看成二进制串，设定偏移量对应子串中的位置为给定值 |
 14 | | BITCOUNT | 统计二进制子串中位置为1的数量，可选择指定的区间 |
 15 | | BITSTOP | 可以对二进制子串进行逻辑运算，并将结果保存到新的key-value中 |
 16 | 
 17 | **实现原理**
 18 | 
 19 | 底层由redis实现的简单动态字符串(SDS)实现的
 20 | 
 21 | 相较于c语言的字符串的有几个优点：
 22 | - 获取字符串的长度的时间复杂度为O(1)
 23 | - API安全不会造成缓冲区溢出
 24 | - 修改字符串时最多需要n次内存分配
 25 | - 可以同时保存文本和二进制数
 26 | - 可以使用c中原来的库函数
 27 | 
 28 | 支持存储三种类型的值：
 29 | - 字符串
 30 | - 整数
 31 | - 浮点数
 32 | 
 33 | **redis在字符串方面有哪些不同于其他数据库？**
 34 | 
 35 | 很多键值数据库只能将数据存储为普通的字符串，并且不提供字符串处理操作，有一些数据库虽
 36 | 然支持简单的追加，但却不可以像redis一样对字符串的子串进行读写（GETRANGE）。
 37 | 
 38 | **redis的字符串是如何保存整数和浮点数的？**
 39 | 
 40 | redis字符串底层使用的也是字符串数组，所以在保存时可以使用整数和浮点数，并且他会自动
 41 | 识别出你保存的是整数还是浮点数，还是字符串，如果是浮点数整数，他将会支持使用自增或
 42 | 自减等操作。
 43 | 
 44 | ### 列表（list）
 45 | 
 46 | **列表的常用命令**
 47 | 
 48 | | op | 注释 |
 49 | |---|---|
 50 | | RPUSH | 在存储在键的列表尾部插入所有指定的值。如果键不存在，则在执行推送操作之前将其创建为空列表。 |
 51 | | LPUSH | 在存储在键的列表头部插入所有指定的值。如果键不存在，则在执行推送操作之前将其创建为空列表。 |
 52 | | LPOP | 从列表的头部开始推出，非阻塞的 |
 53 | | RPOP | 从列表的头部开始推出，非阻塞的 |
 54 | | RPOPLPUSH | 原子地返回并删除存储在源位置的列表的最后一个元素（尾），并将该元素推送到存储在目标位置的列表的第一个元素（头） |
 55 | | LINSERT | 尾部插入 |
 56 | | LINDEX | 获取到对应索引的value值，和普通链表一样，索引从0开始 |
 57 | | BLPOP、BRPOP |一个阻塞列表pop。lpop和rpop的阻塞版本，因为当没有元素从任何给定的列表中弹出时，它会阻塞连接。按照给定的顺序检查给定的键。|
 58 | | BRPOPLPUSH | 阻塞的RPOPLPUSH实现，当列表中不为空时，其功能与RPOPLPUSH完全一样，但当为空时则会阻塞，直到其他的客户端将数据放入进来（可使用无限期阻塞），或者超时才会返回。 |
 59 | | LLEN | 获取当前列表的长度 |
 60 | 
 61 | **~~在redis3.2之前的版本list实现~~**
 62 | 
 63 | 列底层使用了压缩列表和双向链表来实现的，在列表中对象较少时，会使用压缩列表，随着包含
 64 | 的对象越来越多时，
 65 | 将会逐渐转换为性能等方面更好的更适合处理大量元素的双端链表（关于压缩列表和双端链表可
 66 | 查阅《redis的设计与实现》）。
 67 | ```
 68 | redis:6379> RPUSH zip a b c de 12 23 45 "dd"
 69 | (integer) 8
 70 | redis:6379> OBJECT encoding zip
 71 | "ziplist"
 72 | ```
 73 | 但从redis3.2开始将不会在看当这样的返回。
 74 | 
 75 | **在redis3.2之后的版本list实现**
 76 | 
 77 | 列表底层使用了一种数据结构实现[quick list][1]，`quicklist`是一个双向链表，不过这
 78 | 个双向链表的节点使用的则是`ziplist`，如果了解过`ziplist`那将会知道，它是一个内存
 79 | 紧凑的数据结构，其中的每一个数据项前后相邻，并且能够维持数据项的先后顺序。
 80 | 
 81 | **为什么要使用quicklist这种数据结构**
 82 | 
 83 | - 双向链表便于在表的两端进行push和pop操作，但是它的内存开销比较大。首先，它在每个节点
 84 | 上除了要保存数据之外，还要额外保存两个指针；其次，双向链表的各个节点是单独的内存块，
 85 | 地址不连续，节点多了容易产生内存碎片。
 86 | - ziplist由于是一整块连续内存，所以存储效率很高。但是，它不利于修改操作，每次数据变动
 87 | 都会引发一次内存的realloc。特别是当ziplist长度很长的时候，一次realloc可能会导致大
 88 | 批量的数据拷贝，进一步降低性能。
 89 | 
 90 | **一个quicklist节点包含多长的ziplist才能在空间和时间上达到最优？**
 91 | 
 92 | - 每个quicklist节点上的ziplist越短，则内存碎片越多。内存碎片多了，有可能在内存中产
 93 | 生很多无法被利用的小碎片，从而降低存储效率。这种情况的极端是每个quicklist节点上的
 94 | ziplist只包含一个数据项，这就蜕化成一个普通的双向链表了。
 95 | - 每个quicklist节点上的ziplist越长，则为ziplist分配大块连续内存空间的难度就越大。
 96 | 有可能出现内存里有很多小块的空闲空间（它们加起来很多），但却找不到一块足够大的空闲空间
 97 | 分配给ziplist的情况。这同样会降低存储效率。这种情况的极端是整个quicklist只有一个节点，
 98 | 所有的数据项都分配在这仅有的一个节点的ziplist里面。这其实蜕化成一个ziplist了。
 99 | 
100 | 由此可见，每个quicklist节点的ziplist要保持多长，这可能要等到具体的使用场景才能够决定。
101 | `list-max-ziplist-size`可以进行ziplist的size配置。当列表很长时，可以使用
102 | `list-compress-depth`进行中间段压缩。
103 | 
104 | ### 散列（hash）
105 | 
106 | **常用命令**
107 | 
108 | `OP hash field value`
109 | 
110 | | op | 注释 |
111 | |---|---|
112 | | HSET | 设置哈希表的值不存在则创建，存在覆盖，不存在时写入成功返回1，存在时覆盖成功时返回0 
113 | | HSETNX | 当哈希表的filed不存在时创建，field存在时放弃操作，当hash表不存在时则创建hash表，再次执行命令HSETNX，成功创建filed返回1，放弃返回0 
114 | | HGET | 根据给定的哈希表和给定的filed查询出value值 
115 | | HEXISTS | 检查给定的filed是否存在于哈希表中 
116 | | HDEL | 删除指定filed 
117 | | HLEN | 哈希表的长度，就是filed的数量 
118 | | HSTRLEN | 返回哈希表 key 中， 与给定域 field 相关联的值的字符串长度（string length）。 
119 | | HINCRBY | filed值自增 
120 | | HMSET | 同HSET，支持多个value 
121 | | HMGET | 同HGET，支持多个value 
122 | | HKEYS | 获取指定filed的所有key 
123 | | HVALS | 获取指定filed的所有value 
124 | | HGETALL | 同时获取filed下所有的key-value 
125 | 
126 | **实现方式**
127 | 
128 | 散列的底层提供了两种实现方式，`ziplist`和`hashtable`，在一定条件下两种方式会发生相
129 | 互转换，当散列表较小时，默认使用的时`ziplist`，当一个filed存储过多的key-value时会
130 | 转而使用`hashtable`。
131 | 
132 | **ziplist如何实现hash**
133 | 
134 | ziplist使用entry保存每一对键值对，当有新的加入进来时，key会先放到压缩列表的的尾部，然后再
135 | 将value放到尾部，保证每一个key和value时紧挨着的，这样先放入的键值对会存在压缩列表的头部，
136 | 后方进来的会保持在尾部。
137 | 
138 | **hashtable如何实现的hash**
139 | 
140 | hashtable实现hash使用的时字典进行保存键值对，字典的键保存键值对的键，值保存键值对的
141 | 值。字典中的键和值都是用字符串对象。
142 | 
143 | **什么情况下会使他们发生转换**
144 | 
145 | - hash对象中保存的键值对的键和值的字符串长度都小于64
146 | - hash对象中保存的键值对少于512个
147 | 
148 | 满足以上两点键会使用ziplist，反之将会转化为hashtable，不过量值不是固定的，可以通过
149 | 配置文件进行修改，`hash-max-ziplist-value` and `hash-max-ziplist-entries`。
150 | 
151 | ### 集合（Set）
152 | 
153 | **常用命令**
154 | 
155 | `OP key member [member ...]`
156 | 
157 | | op | 注释 |
158 | |---|---|
159 | |SADD|向集合中添加一个元素，member已经存在则会被忽略，key不存在将会被创建
160 | |SISMEMBER|判断member是否为集合中的成员，是返回1其他情况返回0
161 | |SPOP|随机移除一个元素
162 | |SRANDMEMBER|只提供 key 参数时，返回随机一个元素；如果集合为空，返回nil，如果提供了count参数，那么返回一个数组；如果集合为空，返回空数组。
163 | |SREM|移除一个或者多个元素
164 | |SMOVE|原子性操作，移动原集合中的member到目标集合中，目标中存在这是单纯的将member移除。
165 | |SCARD|返回集合中的数量
166 | |SMEMBERS|返回结合中的所有成员
167 | |SINTER|返回一个集合的全部成员，该集合是所有给定集合的交集。不存在的 key 被视为空集。
168 | |SUNION|返回一个集合的全部成员，该集合是所有给定集合的并集。
169 | |SDIFF|返回一个集合的全部成员，该集合是所有给定集合之间的差集。
170 | |SDIFFSTORE|与SDIFF相识，但它将结果保存到 destination 集合，而不是简单地返回结果集。
171 | 
172 | **实现方式**
173 | 
174 | 集合的底层实现是由intset和hashtable实现，使用整数集合时所有元素都被放在集合里面，
175 | 使用hashtable时，将会被保存在字典中，字典的key将会保存每一个元素，字典的value值将会被
176 | 置null。
177 | 
178 | **何时发生转换**
179 | 
180 | - 当集合中所有元素都是整数时
181 | - 当集合中保存的数量超过512时
182 | 
183 | 同时满足以上两点那么redis将会使用intset保存集合中的元素。同样这个是可配置的使用用
184 | `set-max-intset-entries`进行配置。
185 | 
186 | ### 有序集合（sort set）
187 | 
188 | **常用命令**
189 | 
190 | | op | 注释 |
191 | |---|---|
192 | |ZADD | 将member及其score放入到有序key的集合中，如果某个 member 已经是有序集的成员，那么更新这个 member 的 score 值，并通过重新插入这个 member 元素，来保证该 member 在正确的位置上。
193 | |ZSCORE | 返回有序集 key 中，成员 member 的 score 值。
194 | |ZCARD | 返回有序集 key 的基数。
195 | |ZCOUNT | 返回有序集 key 中， score 值在 min 和 max 之间(默认包括 score 值等于 min 或 max )的成员的数量。
196 | |ZRANGE | 返回有序集 key 中，指定区间内的成员。
197 | |ZREVRANGE | 返回有序集 key 中，指定区间内的成员，逆序排列。
198 | |ZRANGEBYSCORE | 返回有序集 key 中，指定区间内的成员。
199 | |ZREVRANGEBYSCORE | 返回有序集 key 中，所有 score 值介于 min 和 max 之间(包括等于 min 或 max )的成员。有序集成员按 score 值逆序。
200 | |ZRANK | 返回有序集 key 中成员 member 的排名。其中有序集成员按 score 值递增(从小到大)顺序排列。
201 | |ZREVRANK | 返回有序集 key 中成员 member 的排名。其中有序集成员按 score 值逆序。
202 | |ZREM | 移除有序集 key 中的一个或多个成员，不存在的成员将被忽略。key存在但不是有序集合时将会报错
203 | |ZREMRANGEBYRANK | 移除有序集 key 中范围内的成员，不存在的成员将被忽略。按照rank的排序
204 | |ZREMRANGEBYSCORE | 移除有序集 key 中的范围内的成员，不存在的成员将被忽略。按照score排序
205 | |ZUNIONSTORE | 交集，并将结果存储到新的有序集合中
206 | 
207 | **实现方式**
208 | 
209 | 有序集合内部由压缩列表、字典和[跳表][2]实现的。
210 | - 在ziplist实现中，每个集合元素使用两个紧挨在一起的压缩列表实现，第一个节点保存元素的
211 | 成员，第二个保存元素的score，内部按照score的大小进行排列，score大的放在靠近表尾，小
212 | 的放在表头。
213 | - 在skiplist的实现中，使用zset作为地层结构，每个zset包含了一个字典和一个跳表。在跳表中
214 | 节点的object属性保存了元素的成员，而跳表的score属性保存了有序集合元素的score。在字典中
215 | 每个字典的key将会保存元素，value将会用来保存score，这样就创建了一个元素到score的映射
216 | ，加快`zscore`的速度。虽然在redis的有序集合skiplist实现中同时使用了两种数据结构，
217 | 不过两种结构时对象项共享的，也就是锁元素的String对象和score的float对象都是被共享的，
218 | 所以不会产生内存浪费这种现象。
219 | 
220 | **为什么要同时使用两种数据结构实现有序集合？**
221 | 
222 | 源码中的注释大概意思就是，为了效率。并且明确的指出了两种数据结构使用的是共享SDS，也就
223 | 是说redis在管理一个字符串时另一个也会被影响。至于为什么使用两种，可以这样理解，若单独使用
224 | 字典来实现，那以O(1)的时间获取指定元素的score将会被保持，但是`ZRANGE`使用这样的范围型
225 | 操作时，由于字典无序，那么也就是说每次获取前都要进行排序，至少需要O(log(n))。同样若单
226 | 独使用跳表实现，那么每次查找元素对应的score将会花费O(log(n))。所以为了让有序集合的查找
227 | 和范围操作快速执行，redis使用了两种数据结构。
228 | 
229 | **何时发生转换**
230 | 
231 | - 当有序集合中元素数量小于128
232 | - 当有序集合中每个元素的长度小于64
233 | 
234 | 同时满足以上两点那么redis将会使用ziplist保存集合中的元素。反之使用skiplist同样这个是可配置的使用用
235 | `zset-max-ziplist-entries`和`zset-max-ziplist-value`进行配置。
236 | 
237 | [1]:http://zhangtielei.com/posts/blog-redis-quicklist.html
238 | [2]:http://zhangtielei.com/posts/blog-redis-skiplist.html


--------------------------------------------------------------------------------
/src/main/java/com/tools/redis/分布式数据库与缓存双写一致性方案.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ### 分布式数据库与缓存双写一致性方案
  3 | 
  4 | #### ~~先更新数据库，在更新缓存~~
  5 | --------------
  6 | 
  7 | 该方案不推荐使用，主要原因有两点：
  8 | 1. 原因一：线程安全问题
  9 | 若同时有AB两个线程进行更新，则会出现数据不一致的问题
 10 |     1. A更新了数据库
 11 |     2. B更新了数据库
 12 |     3. B更新了缓存
 13 |     4. A更新了缓存
 14 | 2. 原因二：业务场景
 15 |     1. 若数据库的写操作比较多，读操作较少的话，会导致缓存频繁更新，浪费性能。
 16 |     2. 若写操作并不是直接将数据写入缓存，有其他计算操作的话，那需要每次更新数据库都要重新计算。
 17 | 
 18 | #### ~~先删除缓存，再更新数据库~~
 19 | ------------------
 20 | 
 21 | 该方案在多线程的条件下同样会存在数据不一致的问题
 22 | 1. 请求A进行写操作，删除缓存；
 23 | 2. 请求B查询发现缓存不存在；
 24 | 3. 请求B去数据库查询得到旧值；
 25 | 4. 请求B将旧值写入缓存；
 26 | 5. 请求A将新值写入数据库。
 27 | 该方案如果不设置超时时间的话，那么在下一次写操作前，缓存中都为脏数据。
 28 | 
 29 | 这里可以采用延迟两遍删除的策略来保证。
 30 | ```jshelllanguage
 31 | public void write(String key,Object data){
 32 |         //第一次先删除
 33 |         redis.delKey(key);
 34 |         //开始写数据
 35 |         db.updateData(data);
 36 |         //等待一秒
 37 |         Thread.sleep(1000);
 38 |         //再次删除缓存
 39 |         redis.delKey(key);
 40 |     }
 41 |     //使用该方案需要考虑延迟的时间问题，要结合自己的写操作逻辑
 42 | ```
 43 | **如果你用了MySQL的读写分离架构怎么办？**
 44 | 
 45 | 在这种情况下，造成数据不一致的原因如下，还是两个请求，一个请求A进行更新操作，另一个请求B进行查询操作。
 46 | 
 47 | 1. 请求A进行写操作，删除缓存；
 48 | 
 49 | 2. 请求A将数据写入数据库了；
 50 | 
 51 | 3. 请求B查询缓存发现，缓存没有值；
 52 | 
 53 | 4. 请求B去从库查询，这时，还没有完成主从同步，因此查询到的是旧值；
 54 | 
 55 | 5. 请求B将旧值写入缓存；
 56 | 
 57 | 数据库完成主从同步，从库变为新值。
 58 | 
 59 | 上述情形，就是数据不一致的原因。还是使用双删延时策略。只是，睡眠时间修改为在主从同步的延时时间基础上，加几百ms。
 60 | 
 61 | **采用这种同步淘汰策略，吞吐量降低怎么办？**
 62 | 
 63 | 那就将第二次删除作为异步的。自己起一个线程，异步删除。这样，写的请求就不用沉睡一段时间再返回。这么做，加大吞吐量。
 64 | 
 65 | **第二次删除，如果删除失败怎么办？**
 66 | 
 67 | 这是个非常好的问题，因为第二次删除失败，就会出现如下情形。还是有两个请求，一个请求A进行更新操作，另一个请求B进行查询操作，为了方便，假设是单库：
 68 | 
 69 | 1. 请求A进行写操作，删除缓存；
 70 | 
 71 | 2. 请求B查询发现缓存不存在；
 72 | 
 73 | 3. 请求B去数据库查询得到旧值；
 74 | 
 75 | 4. 请求B将旧值写入缓存；
 76 | 
 77 | 5. 请求A将新值写入数据库；
 78 | 
 79 | 请求A试图去删除请求B写入对缓存值，结果失败了。
 80 | 
 81 | 这也就是说，如果第二次删除缓存失败，会再次出现缓存和数据库不一致的问题。
 82 | 
 83 | #### 先更新数据库，再删除缓存（推荐）
 84 | ---------------------
 85 | 
 86 | 在微软的一篇文章「cache aside pattern」中指出一种更新策略
 87 | 
 88 | - 失效场景：先从缓存中获取数据，没有得到，从数据库中获取，成功后，加入缓存。
 89 | - 命中场景：命中缓存中的数据，返回
 90 | - 更新场景：先把数据保存到数据库中，成功后删除缓存，或让缓存失效
 91 | **并发问题**
 92 | 缓存刚好失效；
 93 | 
 94 | 请求A查询数据库，得一个旧值；
 95 | 
 96 | 请求B将新值写入数据库；
 97 | 
 98 | 请求B删除缓存；
 99 | 
100 | 请求A将查到的旧值写入缓存。
101 | 此时确实会产生数据不一致的问题，但一般场景下，数据写入会更慢于查询。
102 | 
103 | 解决方案就是->给缓存设定失效时间，或者使用延迟删除的策略
104 | 
105 | **删除缓存失败怎么办**
106 | 答：**重试机制**
107 | 
108 | - 使用消息队列来维护重试列表，每次失败时，我们可以将失败的key保存到消息队列中，一段时间后在进行重试，缺点是会对原有的业务代码造成侵入。
109 | - 第二种方案是，监控binlog，每当有操作时，将消息发送到另一各系统中，这样就不会对原有的业务代码造成侵入，问题是维护两套系统，不过该方案可以考虑使用已有的binlog工具。
110 | 
111 | 参考：[分布式数据库与缓存双写一致性方案解疑][1]
112 | 
113 | [1]:https://mp.weixin.qq.com/s/ICABpJJkeaFoOO0qeAa2cA


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/discovery/client/DistributeClient.java:
--------------------------------------------------------------------------------
  1 | package com.tools.zookeeper.discovery.client;
  2 | 
  3 | import lombok.extern.slf4j.Slf4j;
  4 | import org.apache.zookeeper.ZooKeeper;
  5 | 
  6 | import java.util.ArrayList;
  7 | import java.util.List;
  8 | import java.util.concurrent.CountDownLatch;
  9 | 
 10 | @Slf4j
 11 | public class DistributeClient {
 12 | 
 13 |     private static final String connectString = "localhost:2181";
 14 | 
 15 |     private static final Integer sessionTimeout = 2000;
 16 | 
 17 |     private static final String parentNode = "/Servers";
 18 | 
 19 |     private ZooKeeper zk = null;
 20 | 
 21 |     private volatile List<String> serverList = null;
 22 | 
 23 |     private static CountDownLatch countDownLatch = new CountDownLatch(1);
 24 | 
 25 |     /**
 26 |      * 异步或者zookeeper链接，注意要使用CountDownLatch阻塞
 27 |      *
 28 |      * @throws Exception
 29 |      */
 30 |     public void getConnect() throws Exception {
 31 |         zk = new ZooKeeper(connectString, sessionTimeout, event -> {
 32 |             try {
 33 |                 log.info("链接成功,准备获取信息");
 34 |                 countDownLatch.countDown();
 35 |                 getServerList();
 36 |             } catch (Exception e) {
 37 |                 log.error("获取信息失败！{}", e.getMessage());
 38 |             }
 39 |         });
 40 |         countDownLatch.await();
 41 |     }
 42 | 
 43 | 
 44 |     /**
 45 |      * 获取服务列表
 46 |      *
 47 |      * @throws Exception
 48 |      */
 49 |     public void getServerList() throws Exception {
 50 |         /**
 51 |          * 读取数据，可以获取到节点列表和节点数据，
 52 |          */
 53 | //		List<String> children = zk.getChildren(parentNode, true);
 54 |         List<String> children = zk.getChildren(parentNode, event -> {
 55 |             try {
 56 |                 /**
 57 |                  * 支持自定义Watch，在节点变更时会发送NodeChildrenChanged事件
 58 |                  * 不过Watch仅一次有效
 59 |                  */
 60 |                 log.debug(event.getType().toString());
 61 |                 log.debug(event.getState().toString());
 62 |                 log.info("此刻有节点变更事件产生！");
 63 |                 getServerList();
 64 |             } catch (Exception e) {
 65 |                 log.error("注册的Watch调用失败。");
 66 |             }
 67 |         });
 68 | 
 69 |         List<String> list = new ArrayList<>();
 70 | 
 71 |         for (String child : children) {
 72 |             /**
 73 |              * 可以根据路径，获取节点中保存的数据，同样getChildren支持Watch注册
 74 |              * 在节点数据发生变化时，可以发送事件。NodeDataChanged
 75 |              */
 76 |             log.info("服务节点路径为：{}",child);
 77 |             byte[] data = zk.getData(parentNode + "/" + child, false, null);
 78 |             list.add(new String(data));
 79 |         }
 80 |         serverList = list;
 81 |         handlerService();
 82 |     }
 83 | 
 84 |     /**
 85 |      * 打印服务列表
 86 |      */
 87 |     public void handlerService() {
 88 |         if (serverList.size() < 1) {
 89 |             log.info("当前无可用节点");
 90 |             return;
 91 |         }
 92 |         serverList.forEach(server -> log.info("当前在线服务有：{}", server));
 93 |     }
 94 | 
 95 |     public static void main(String[] args) throws Exception {
 96 |         DistributeClient client = new DistributeClient();
 97 |         client.getConnect();
 98 |         Thread.sleep(Long.MAX_VALUE);
 99 |     }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/discovery/server/DistributeServer.java:
--------------------------------------------------------------------------------
 1 | package com.tools.zookeeper.discovery.server;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.zookeeper.CreateMode;
 5 | import org.apache.zookeeper.ZooDefs.Ids;
 6 | import org.apache.zookeeper.ZooKeeper;
 7 | 
 8 | import java.util.concurrent.CountDownLatch;
 9 | 
10 | @Slf4j
11 | public class DistributeServer {
12 | 
13 |     //zookeeper连接地址
14 |     private static final String connectString = "localhost:2181";
15 |     //超时时间  用于zookeeper判断当前节点等待心跳最长时间
16 |     private static final Integer sessionTimeout = 2000;
17 |     //根节点  ------ 启动前需要手动创建
18 |     private static final String parentNode = "/Servers";
19 | 
20 |     private ZooKeeper zk = null;
21 | 
22 |     private static CountDownLatch countDownLatch = new CountDownLatch(1);
23 | 
24 | 
25 |     /**
26 |      * 创建链接
27 |      *
28 |      * @throws Exception
29 |      */
30 |     public void getConnect() throws Exception {
31 |         zk = new ZooKeeper(connectString, sessionTimeout, event -> {
32 |             log.info("链接状态更改----{}", event.getState());
33 |             countDownLatch.countDown();
34 |         });
35 |         /**
36 |          * 由于创建是异步的可能会导致链接未创建就执行
37 |          * 所以这里使用CountDownLatch进行阻塞
38 |          *
39 |          * 在链接创建后使用Watch进行解除阻塞。
40 |          */
41 |         countDownLatch.await();
42 |         //根据两个能够确定一个会话，可以实现客户端会话复用
43 |         log.info("sessionId为：{}", zk.getSessionId());
44 |         log.info("会话密钥为：{}", zk.getSessionPasswd());
45 | 
46 | 
47 |     }
48 | 
49 |     /**
50 |      * @param @param  hostName
51 |      * @param @throws Exception
52 |      * @param @throws InterruptedException
53 |      * @return void
54 |      * @throws
55 |      * @Title: regServer
56 |      * @Description: 向zookeeper注册服务
57 |      */
58 |     public void regServer(String hostName) throws Exception {
59 |         //支持异步创建，不支持递归创建，即不存在父节点的情况下不可以创建
60 |         String creatPath = zk.create(parentNode + "/server", hostName.getBytes(), Ids.OPEN_ACL_UNSAFE,
61 |                 CreateMode.EPHEMERAL);
62 |         log.info("{}-------- is on line-----{}", hostName, creatPath);
63 |     }
64 | 
65 |     /**
66 |      * @param hostName
67 |      */
68 |     public void handleService(String hostName) {
69 |         log.info("{} start working", hostName);
70 |     }
71 | 
72 |     public static void main(String[] args) throws Exception {
73 | 
74 |         DistributeServer distributeServer = new DistributeServer();
75 | 
76 |         distributeServer.getConnect();
77 | 
78 |         distributeServer.regServer(args[0]);
79 | 
80 |         distributeServer.handleService(args[0]);
81 | 
82 |         Thread.sleep(Long.MAX_VALUE);
83 |     }
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/discovery/服务注册与发现.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/src/main/java/com/tools/zookeeper/discovery/服务注册与发现.md


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/election/Broker_1.java:
--------------------------------------------------------------------------------
 1 | package com.tools.zookeeper.election;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.curator.framework.CuratorFramework;
 5 | 
 6 | import java.util.concurrent.CountDownLatch;
 7 | 
 8 | @Slf4j
 9 | public class Broker_1 {
10 |     private static final CountDownLatch shutdownLatch = new CountDownLatch(1);
11 |     private static final String name = "Broker_1";
12 | 
13 |     public static void main(String[] args) throws InterruptedException {
14 |         ZkElectionUtil electionUtil = new ZkElectionUtil();
15 |         try {
16 |             electionUtil.electionMaster(name.getBytes());
17 |         } catch (Exception e) {
18 |             e.printStackTrace();
19 |         }
20 |         shutdownLatch.await();
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/election/Broker_2.java:
--------------------------------------------------------------------------------
 1 | package com.tools.zookeeper.election;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | 
 5 | import java.util.concurrent.CountDownLatch;
 6 | 
 7 | @Slf4j
 8 | public class Broker_2 {
 9 |     private static final CountDownLatch shutdownLatch = new CountDownLatch(1);
10 |     private static final String name = "Broker_2";
11 | 
12 |     public static void main(String[] args) throws InterruptedException {
13 |         ZkElectionUtil electionUtil = new ZkElectionUtil();
14 |         try {
15 |             electionUtil.electionMaster(name.getBytes());
16 |         } catch (Exception e) {
17 |             e.printStackTrace();
18 |         }
19 |         shutdownLatch.await();
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/election/Broker_3.java:
--------------------------------------------------------------------------------
 1 | package com.tools.zookeeper.election;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.curator.framework.CuratorFramework;
 5 | 
 6 | import java.util.concurrent.CountDownLatch;
 7 | 
 8 | @Slf4j
 9 | public class Broker_3 {
10 |     private static final CountDownLatch shutdownLatch = new CountDownLatch(1);
11 |     private static final String name = "Broker_3";
12 | 
13 |     public static void main(String[] args) throws InterruptedException {
14 |         ZkElectionUtil electionUtil = new ZkElectionUtil();
15 |         try {
16 |             electionUtil.electionMaster(name.getBytes());
17 |         } catch (Exception e) {
18 |             e.printStackTrace();
19 |         }
20 |         shutdownLatch.await();
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/election/ZkElectionUtil.java:
--------------------------------------------------------------------------------
 1 | package com.tools.zookeeper.election;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.curator.framework.CuratorFramework;
 5 | import org.apache.curator.framework.CuratorFrameworkFactory;
 6 | import org.apache.curator.framework.api.CuratorWatcher;
 7 | import org.apache.curator.retry.ExponentialBackoffRetry;
 8 | import org.apache.zookeeper.CreateMode;
 9 | 
10 | import java.util.Arrays;
11 | import java.util.Objects;
12 | 
13 | 
14 | @Slf4j
15 | public class ZkElectionUtil {
16 |     private static final String CONNECTSTRING = "localhost:2181";
17 |     private static final int SESSIONTIMEOUT = 2000;
18 |     private static final String LOCKNODE = "/rootNode/lock";
19 |     private static final CuratorFramework client;
20 |     // 初始化客户端
21 |     static {
22 |         ExponentialBackoffRetry exponentialBackoffRetry = new ExponentialBackoffRetry(SESSIONTIMEOUT, 3);
23 |         client = CuratorFrameworkFactory.newClient(CONNECTSTRING, exponentialBackoffRetry);
24 |         client.start();
25 |     }
26 | 
27 |     /**
28 |      * 创建节点
29 |      * @param data
30 |      * @return
31 |      */
32 |     private boolean getLock(byte[] data) {
33 |         try {
34 |             if (Objects.isNull(client.checkExists().forPath(LOCKNODE))) {
35 |                 client.create()
36 |                         .creatingParentContainersIfNeeded()
37 |                         .withMode(CreateMode.EPHEMERAL)
38 |                         .forPath(LOCKNODE, data);
39 |             } else {
40 |                 return false;
41 |             }
42 |         } catch (Exception e) {
43 |             log.warn("create node path fail, reason: {}", e.getMessage());
44 |             return false;
45 |         }
46 |         return true;
47 |     }
48 | 
49 |     /**
50 |      * 选主
51 |      * @param data
52 |      * @throws Exception
53 |      */
54 |     void electionMaster(byte[] data) throws Exception {
55 |         //尝试创建zk临时节点
56 |         if (getLock(data)) {
57 |             log.info("now you are leader");
58 |         } else {
59 |             log.warn("now you are follower,  leader was: {}", getLeader());
60 |             client.getData()
61 |                     // 每次选举失败，重新注册节点监听事件
62 |                     .usingWatcher((CuratorWatcher) event -> {
63 |                         log.info("leader node was changed, will start election");
64 |                         // 递归调用
65 |                         electionMaster(data);
66 |                     })
67 |                     .forPath(LOCKNODE);
68 |         }
69 |     }
70 | 
71 |     /**
72 |      * 获取创建成功的数据
73 |      * @return
74 |      */
75 |     private String getLeader() {
76 |         try {
77 |             return Arrays.toString(client.getData().forPath(LOCKNODE));
78 |         } catch (Exception e) {
79 |             log.error("get leader error: {}",e.getMessage());
80 |         }
81 |         return "no leader";
82 |     }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/java/com/tools/zookeeper/zookeeper选举机制.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sev7e0/bigdata-practice/ffbdd93bd555fd388d4dd20ccc3379124a3eae5f/src/main/java/com/tools/zookeeper/zookeeper选举机制.pdf


--------------------------------------------------------------------------------
/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="WARN">
 3 |     <Appenders>
 4 |         <Console name="Console" target="SYSTEM_OUT">
 5 |             <PatternLayout pattern="%d{YYYY-MM-dd HH:mm:ss} [%t] %-5p %c{1}:%L - %msg%n"/>
 6 |         </Console>
 7 | 
 8 |         <RollingFile name="RollingFile" filename="log/test.log" filepattern="${logPath}/%d{YYYYMMddHHmmss}-fargo.log">
 9 |             <PatternLayout pattern="%d{YYYY-MM-dd HH:mm:ss} [%t] %-5p %c{1}:%L - %msg%n"/>
10 |             <Policies>
11 |                 <SizeBasedTriggeringPolicy size="10 MB"/>
12 |             </Policies>
13 |             <DefaultRolloverStrategy max="20"/>
14 |         </RollingFile>
15 | 
16 |     </Appenders>
17 |     <Loggers>
18 |         <Root level="info">
19 |             <AppenderRef ref="Console"/>
20 |             <AppenderRef ref="RollingFile"/>
21 |         </Root>
22 |     </Loggers>
23 | </Configuration>


--------------------------------------------------------------------------------