├── .idea
├── checkstyle-idea.xml
├── compiler.xml
├── encodings.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── misc.xml
├── uiDesigner.xml
├── vcs.xml
└── workspace.xml
├── README.MD
├── pom.xml
├── run
├── HdfsFileClean.sh
├── HiveTableClean.sh
└── ParseFsimageFile2Hive.sh
└── src
├── main
├── java
│ └── com
│ │ └── xkj
│ │ └── mlrc
│ │ ├── clean
│ │ ├── domain
│ │ │ ├── LogBean.java
│ │ │ └── ParamOption.java
│ │ ├── file
│ │ │ └── HdfsFileClean.java
│ │ ├── table
│ │ │ └── HiveTableClean.java
│ │ └── util
│ │ │ ├── ArgsUtil.java
│ │ │ ├── DateUtil.java
│ │ │ ├── HdfsUtils.java
│ │ │ ├── JdbcHelper.java
│ │ │ └── PropsUtil.java
│ │ ├── common
│ │ └── shell
│ │ │ ├── MyUserInfo.java
│ │ │ └── Shell.java
│ │ └── fsimage
│ │ ├── GenerateFsimageTable.java
│ │ └── GetFromFsImageInfo.java
└── resources
│ ├── config.properties
│ ├── core-site.xml
│ ├── hdfs-site.xml
│ ├── hive-site.xml
│ └── log4j.properties
└── test
└── java
└── com
└── xkj
└── mlrc
├── clean
└── util
│ ├── HdfsUtilsTest.java
│ └── JdbcHelperTest.java
└── fsimage
└── GetFromFsImageTest.java
/.idea/checkstyle-idea.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
15 |
16 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 | doRunMain
68 |
69 | \t
70 | hive.meta.mysql.url
71 | \n
72 | hdfs://c
73 | abs
74 | app_xqtt_news_sdk_show_click
75 | trash
76 | move
77 | tras
78 | all_overdue_dirs
79 | show
80 | execute
81 | tbl_name
82 | targetPath
83 | \\\n
84 | prof
85 | mlg
86 | mlg.
87 | lom
88 | ware
89 |
90 |
91 |
92 |
93 |
94 | F:\workspace\temp\data-manager\src\main\java\com\xkj\mlrc\clean
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 | 1587448851975
453 |
454 |
455 | 1587448851975
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 | jar://D:/software/jdk181/src.zip!/java/sql/DriverManager.java
564 | 663
565 |
566 |
567 |
568 |
569 | file://$PROJECT_DIR$/data-manager/src/main/java/com/xkj/mlrc/clean/table/HiveTableClean.java
570 | 266
571 |
572 |
573 |
574 |
575 | file://$PROJECT_DIR$/data-manager/src/test/java/com/xkj/mlrc/fsimage/GetFromFsImageTest.java
576 | 36
577 |
578 |
579 |
580 |
581 | file://$PROJECT_DIR$/data-manager/src/main/java/com/xkj/mlrc/fsimage/GenerateFsimageTable.java
582 | 54
583 |
584 |
585 |
586 |
587 | file://$PROJECT_DIR$/data-manager/src/main/java/com/xkj/mlrc/fsimage/GetFromFsImageInfo.java
588 | 74
589 |
590 |
591 |
592 |
593 | file://$PROJECT_DIR$/data-manager/src/main/java/com/xkj/mlrc/fsimage/GetFromFsImageInfo.java
594 | 88
595 |
596 |
597 |
598 |
599 | file://$PROJECT_DIR$/data-manager/src/main/java/com/xkj/mlrc/fsimage/GetFromFsImageInfo.java
600 | 101
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 | No facets are configured
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 | Python 3.6 (ad-datatransfer) interpreter library
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 | 1.8
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 | data-manager
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 | Python 3.6 (ad-datatransfer)
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 | Maven: antlr:antlr:2.7.7
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
1 | # data-manager
2 | # 自动化HDFS数据清理的终极方法
3 |
4 | 代码地址:https://github.com/lijufeng2016/data-manager
5 |
6 | ## 一、背景
7 |
8 | hdfs集群从出生到成长,经历了各种各样业务和人的摧残,早已疲惫不堪,承受巨大压力。某天,你突然发现hdfs的空间超过80%的告警阈值,这时候,你的第一反应是找出那些比较占空间的目录,手动删除,或者是写个定时脚本每天清理固定的目录,随着业务和时间的不断摧残,小朋友,你是否有很多问号?
9 |
10 | - 占大头的文件清理后还是发现hdfs占用空间大,不停地找要清理的目录,最后发现小文件加起来也占用大
11 | - 同时很多人使用同一个集群,经过互联网的几番大洗礼后hdfs留下很多已离职人员的大量文件不知所措
12 | - 哪些数据该清还是不该清毫无头绪,万一误删了呢
13 | - 很多垃圾文件存放在hdfs上,但是分不清哪些到底是垃圾文件,不知道文件还有没有人用
14 |
15 |
16 |
17 | 针对上述问题,我们可不可以换一种清理hdfs的思路呢?不需要反复修改脚本去指定特定要删的目录呢?也不需要为了找哪些需要清理的目录而焦头烂额呢?来,接下来变魔术给你看。
18 |
19 | ## 二、原理
20 |
21 | 本文介绍一种方法,可以清hive数据,也可以清非hive表的hdfs数据。基本原理是通过解析hadoop fsimage文件获得hdfs全量的文件路径和所有文件最后的访问时间,请hive表数据则还需要加上hive的元数据信息。
22 |
23 | ### fsimage:
24 |
25 | fsimage是hdfs的心脏,hdfs的全量的路径信息都存放在fsimage文件里面。我们在操作hdfs时,不论是增删改查,hadoop都会记录一条edit log,也就是hdfs的操作记录,edit log会定时merge生成fsimage文件,在HA模式下,fsimage文件由standby NameNode生成,单点模式下,由secondary NameNode生成。fsimage文件本身是二进制不可明文读取的,我们需要解析成可读的形式,比如csv。hadoop自带的命令`hdfs oiv`是专门用来解析fsimage文件,通过执行`hdfs getconf -confKey dfs.namenode.name.dir`命令可以知道fsimage的路径,在路径下默认会保存两个fsimage文件,都是fsimage_xxxxxxxxxxx的格式带一串时间戳,时间戳最大的那个就是由最新edit log合并解析生成的。
26 |
27 | 执行:
28 |
29 | ```shell
30 | hdfs oiv -p Delimited -delimiter "," -i fsimage_xxxxxxxx -o fsimage.csv
31 | ```
32 |
33 | 解析fsimage生成csv文件,文件内容包含了hdfs所有文件和目录,csv包含如下列:
34 |
35 | - **Path** 目录路径
36 | - Replication 备份数
37 | - ModificationTime 最后修改时间
38 | - **AccessTime** 最后访问时间
39 | - PreferredBlockSize 首选块大小 byte
40 | - BlocksCount 块 数
41 | - FileSize 文件大小 byte
42 | - NSQUOTA 名称配额 限制指定目录下允许的文件和目录的数量。
43 | - DSQUOTA 空间配额 限制该目录下允许的字节数
44 | - Permission 权限
45 | - UserName 用户
46 | - GroupName 用户组
47 |
48 | 加粗的部分,是两个最重要的字段,**AccessTime**作为hdfs文件访问的最后时间,**可以根据它去确定哪些文件还在用,哪些已经很久没用,可以判定为垃圾文件或过期数据,达到清理的目的**。必须要开启`dfs.namenode.accesstime.precision`参数才会有AccessTime,默认开启值为1。但是在hdp集群是默认关闭的,注意要在hdfs-site.xml文件里面配置开启。
49 |
50 | 解析后的csv文件会上传到对应字段的建好的hive表,给后面清理逻辑使用
51 |
52 | ### hive元数据
53 |
54 | 一般在配置hive的时候,都会选用mysql作为元数据存储的介质,hive的元数据表很多,记录了表名、分区、路径、参数等等一切除了表数据之外的所有信息,我们在hive的元数据库里面需要知道表的**hdfs路径**和**分区**,清理hive数据的时候再根据上述的fsimage对应的hive表去做关联,把要清理的表或表分区关联出来
55 |
56 | ## 三、使用方法
57 |
58 | 代码地址:https://github.com/lijufeng2016/data-manager
59 |
60 | 主类:
61 |
62 | `com.xkj.mlrc.fsimage.GenerateFsimageTable`:解析生成fsimage的csv文件并上传到hive
63 |
64 | `com.xkj.mlrc.clean.table.HiveTableClean`:清理hive表的逻辑
65 |
66 | `com.xkj.mlrc.clean.file.HdfsFileClean`:清理hdfs目录文件的逻辑,与上面清理hive的逻辑独立不冲突
67 |
68 | args参数说明:
69 |
70 | | 参数名 | 说明 |
71 | | --------------- | ------------------------------------------------------------ |
72 | | -targetPath | 指定的要删的目标路径,逗号隔开 |
73 | | -avoidPath | 要避开删除的路径,不扫描的路径,逗号隔开 |
74 | | -avoidSuffix | 要避开的包含后缀的文件,逗号隔开 |
75 | | -avoidPrefix | 要避开的包含前缀的文件,逗号隔开 |
76 | | -avoidDbs | 要避免删除的hive库,包含库下所有的表分区,逗号隔开 |
77 | | -avoidTbls | 要避免删除的hive表,包含表下所有的分区,逗号隔开 |
78 | | -avoidTbls-file | 用要避免删除的表,用文件存放在hdfs,必须是“库.表名”的形式,包含表下所有的分区 |
79 | | -expire | 过期的数据时间,也就是清理多少天之前的数据,这是个参数很重要,必须大于0 |
80 | | -hdfsroot | hdfs根路径,HA模式如 hdfs://bigdatacluster,单点模式如:hdfs://xxxx:50070 |
81 |
82 | 必要的准备
83 |
84 | ### 本地idea运行:
85 |
86 | #### step1:准备工作
87 |
88 | 必须要把**hive-site.xml、core-site.xml、hdfs-site.xml**文件放在项目的resources下,否则运行不起来!然后按照自己的环境修改所有**config.properties**配置项。
89 |
90 | #### step2:解析fsimage文件
91 |
92 | 执行主类**com.xkj.mlrc.fsimage.GenerateFsimageTable**,会远程ssh到NameNode执行一系列shell并解析fsimage文件上传到hdfs
93 |
94 | #### step3:清理数据
95 |
96 | 根据自己的需要运行
97 |
98 | `com.xkj.mlrc.clean.table.HiveTableClean` 或 `com.xkj.mlrc.clean.file.HdfsFileClean`清理hive表或hdfs数据,并根据上面的args参数说明列表传入自己需要的参数运行
99 |
100 |
101 |
102 | ### yarn运行:
103 |
104 | #### step1:准备工作
105 |
106 | 照自己的环境修改所有**config.properties**配置项,maven打包项目生成data-manager.jar文件上传到集群机器上
107 |
108 | #### step2:解析fsimage文件
109 |
110 | 在NameNode的节点下执行项目run目录下的`ParseFsimageFile2Hive.sh`脚本,会执行一系列shell并解析fsimage文件上传到hdfs
111 |
112 | #### step3:清理数据
113 |
114 | 根据自己的需要运行项目run目录下的`HdfsFileClean.sh` 或 `HiveTableClean.sh`脚本清理hive表或hdfs数据,根据自己的需要配置上面的args参数列表
115 |
116 | ## 四、总结
117 |
118 | 这种方法完美的利用了fsimage文件和hive元数据。传统删数据的方法需要用户知道哪些目录该删或不该删,用这种方法,你只需要关注多久没使用过的数据就删,比如有的文件连续超过7天未被读取,之后被读取的可能性也不大,就可以用上面的代码去做清理。代码里也特意做了安全机制,hdfs的java api中,直接删除的话不会放hdfs的回收站,这个项目里是把所有数据放入回收站,等到回收触发的时间才彻底删除,如果误删了数据也可以有时间恢复。
119 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.xkj.mlrc
8 | data-manager
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | UTF-8
14 | 5.1.38
15 | 2.7.3
16 | 2.3.0
17 |
18 |
19 | ${project.artifactId}
20 |
21 |
22 | src/main/resources
23 |
24 | *.xml
25 |
26 |
27 |
28 | src/main/resources/
29 | false
30 |
31 |
32 | src/main/java
33 |
34 | **/*
35 |
36 | false
37 |
38 |
39 |
40 |
41 |
42 | org.apache.maven.plugins
43 | maven-compiler-plugin
44 | 3.1
45 |
46 | 1.8
47 | 1.8
48 |
49 |
50 |
51 |
52 | maven-assembly-plugin
53 | 3.1.0
54 |
55 |
56 | jar-with-dependencies
57 |
58 | false
59 |
60 |
61 |
62 | make-assembly
63 | package
64 |
65 | single
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | org.apache.hive
77 | hive-jdbc
78 | 1.2.1
79 |
80 |
81 | org.eclipse.jetty.aggregate
82 | jetty-all
83 |
84 |
85 | org.apache.hive
86 | hive-shims
87 |
88 |
89 | provided
90 |
91 |
92 |
93 |
94 | com.alibaba
95 | druid
96 | 1.1.10
97 | provided
98 |
99 |
100 |
101 |
102 | junit
103 | junit
104 | 4.5
105 | test
106 |
107 |
108 | mysql
109 | mysql-connector-java
110 | 5.1.38
111 | provided
112 |
113 |
114 |
115 | args4j
116 | args4j
117 | 2.33
118 | provided
119 |
120 |
121 |
122 | io.netty
123 | netty-all
124 | 4.1.17.Final
125 | provided
126 |
127 |
128 | org.apache.hadoop
129 | hadoop-common
130 | ${hadoop.version}
131 | provided
132 |
133 |
134 |
135 | org.apache.hadoop
136 | hadoop-hdfs
137 | ${hadoop.version}
138 | provided
139 |
140 |
141 |
142 | commons-logging
143 | commons-logging
144 | 1.2
145 | provided
146 |
147 |
148 |
149 |
150 | org.projectlombok
151 | lombok
152 | 1.18.0
153 | provided
154 |
155 |
156 |
157 | org.apache.spark
158 | spark-core_2.11
159 | ${spark.version}
160 | provided
161 |
162 |
163 | org.apache.spark
164 | spark-sql_2.11
165 | ${spark.version}
166 | provided
167 |
168 |
169 | org.apache.spark
170 | spark-streaming_2.11
171 | ${spark.version}
172 | provided
173 |
174 |
175 | org.apache.spark
176 | spark-hive_2.11
177 | ${spark.version}
178 | provided
179 |
180 |
181 | org.apache.spark
182 | spark-streaming-kafka-0-10_2.11
183 | 2.3.0
184 | provided
185 |
186 |
187 | org.apache.spark
188 | spark-yarn_2.11
189 | ${spark.version}
190 | provided
191 |
192 |
193 |
--------------------------------------------------------------------------------
/run/HdfsFileClean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | source /etc/profile
3 | echo "submit spark job"
4 |
5 | spark-submit --master yarn-cluster \
6 | --conf spark.storage.memoryFraction=0.1 \
7 | --executor-cores 2 \
8 | --num-executors 2 \
9 | --executor-memory 2g \
10 | --driver-memory 2g \
11 | --class com.xkj.mlrc.clean.file.HdfsFileClean \
12 | data-manager.jar \
13 | -avoidSuffix .jar,.xml \
14 | -expire 3 \
15 | -targetPath /user/cxy/userprofile/log,/user//user/xqlm \
16 | -avoidPath /user/bin,/user/spark
17 |
18 |
19 |
20 | rc=$?
21 | if [[ $rc != 0 ]]; then
22 | echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run failed......"
23 | exit 1
24 | else
25 | echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run successfully......."
26 | fi
27 |
28 |
--------------------------------------------------------------------------------
/run/HiveTableClean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | source /etc/profile
3 | echo "submit spark job"
4 | spark-submit --master yarn-cluster \
5 | --conf spark.storage.memoryFraction=0.1 \
6 | --executor-cores 2 \
7 | --num-executors 2 \
8 | --executor-memory 2g \
9 | --driver-memory 2g \
10 | --class com.xkj.mlrc.clean.table.HiveTableClean \
11 | data-manager.jar \
12 | -avoidSuffix .jar,.xml \
13 | -expire 3
14 |
15 |
16 |
17 |
18 |
19 | rc=$?
20 | if [[ $rc != 0 ]]; then
21 | echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run failed......"
22 | exit 1
23 | else
24 | echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run successfully......."
25 | fi
26 |
--------------------------------------------------------------------------------
/run/ParseFsimageFile2Hive.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #创建hive表
3 | hive -S -e "CREATE TABLE IF NOT EXISTS fsimage( \
4 | path string, \
5 | replication int, \
6 | modificationtime string, \
7 | accesstime string, \
8 | preferredblocksize bigint, \
9 | blockscount int, \
10 | filesize bigint, \
11 | nsquota int, \
12 | dsquota int, \
13 | permission string, \
14 | username string, \
15 | groupname string) \
16 | ROW FORMAT DELIMITED \
17 | FIELDS TERMINATED BY ',' \
18 | STORED AS INPUTFORMAT \
19 | 'org.apache.hadoop.mapred.TextInputFormat' \
20 | OUTPUTFORMAT \
21 | 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' \
22 | location '/tmp/fsimage'"
23 |
24 | #在NameNode节点,最好是standby NameNode解析fsimage文件
25 | nn_paths=`hdfs getconf -confKey dfs.namenode.name.dir`
26 | nn_path=${nn_paths##*,}
27 | echo ${nn_path}
28 | # 找到后缀名最大的那个fsimage文件,如:fsimage_0000000000157279038
29 | fsimage_file=`find ${nn_path}/current -type f -name 'fsimage_*' | grep -v '.md5' | sort -n | tail -n1`
30 | #解析fsimage文件成csv
31 | hdfs oiv -p Delimited -delimiter "," -i ${fsimage_file} -o fsimage.csv
32 | #上传到hive表,供后面做分析和删除文件使用
33 | hadoop fs -put -f fsimage.csv /tmp/fsimage/
34 |
35 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/domain/LogBean.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.domain;
2 |
3 | /**
4 | * 日志对象
5 | * @author lijf@2345.com
6 | * @date 2020/4/21 14:09
7 | * @desc
8 | */
9 |
10 | public class LogBean {
11 | String path;
12 | Integer replication;
13 | String modificationtime;
14 | String accesstime;
15 | Long preferredblocksize;
16 | Integer blockscount;
17 | Long filesize;
18 | Integer nsquota;
19 | Integer dsquota;
20 | String permission;
21 | String username;
22 | String groupname;
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/domain/ParamOption.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.domain;
2 |
3 | import org.kohsuke.args4j.Option;
4 |
5 | public class ParamOption {
6 |
7 | @Option(name="-targetPath", usage="指定的要删的目标路径")
8 | public String targetPath;
9 | @Option(name="-avoidPath", usage="要避开的路径,不扫描的路径,逗号隔开")
10 | public String avoidPath;
11 | @Option(name="-avoidSuffix", usage="要避开的包含后缀的文件")
12 | public String avoidSuffix;
13 | @Option(name="-avoidPrefix", usage="要避开的包含前缀的文件")
14 | public String avoidPrefix;
15 | @Option(name="-avoidDbs", usage="要避免删除的数据库,包含库下所有的表分区,逗号隔开")
16 | public String avoidDb;
17 | @Option(name="-avoidTbls", usage="用要避免删除的表,包含表下所有的分区,逗号隔开")
18 | public String avoidTbls;
19 | @Option(name="-avoidTbls-file", usage="用要避免删除的表,用hdfs文件存放,必须是“库.表名”的形式,包含表下所有的分区")
20 | public String avoidTblsFile;
21 | @Option(name="-expire", usage="过期的数据",required = true)
22 | public Integer expire;
23 | @Option(name="-hdfsroot", usage="hdfs根路径,默认hdfs://cluster")
24 | public String hdfsroot = "hdfs://cluster";
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/file/HdfsFileClean.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.file;
2 |
3 | import com.xkj.mlrc.clean.domain.ParamOption;
4 | import com.xkj.mlrc.clean.util.ArgsUtil;
5 | import com.xkj.mlrc.clean.util.HdfsUtils;
6 | import com.xkj.mlrc.fsimage.GetFromFsImageInfo;
7 |
8 | import lombok.extern.slf4j.Slf4j;
9 | import org.apache.spark.api.java.function.ForeachFunction;
10 | import org.apache.spark.sql.*;
11 |
12 | import java.io.IOException;
13 |
14 |
15 | /**
16 | * hdfs文件清理
17 | * @author lijf@2345.com
18 | * @date 2020/4/21 14:06
19 | * @desc
20 | */
21 | @Slf4j
22 | public class HdfsFileClean {
23 | public static void main(String[] args) {
24 | SparkSession spark = getSparkSession();
25 | ParamOption option = ArgsUtil.getOption(args);
26 |
27 | GetFromFsImageInfo fsImageInfo = GetFromFsImageInfo.builder()
28 | .spark(spark)
29 | .avoidPrefix(option.avoidPrefix)
30 | .avoidSuffix(option.avoidSuffix)
31 | .avoidPath(option.avoidPath)
32 | .expire(option.expire)
33 | .targetPath(option.targetPath)
34 | .hdfsroot(option.hdfsroot)
35 | .build();
36 |
37 | Dataset allFiles = fsImageInfo.getAllFiles();
38 | allFiles.foreach(new ForeachFunction() {
39 | @Override
40 | public void call(Row row) throws Exception {
41 | String path = row.getAs("path").toString();
42 | try {
43 | HdfsUtils.trashPath(path);
44 | log.info("删除路径成功:" + path);
45 | } catch (IOException e) {
46 | log.info("删除路径失败:" + path);
47 | e.printStackTrace();
48 | }
49 | }
50 | });
51 |
52 |
53 | }
54 | private static SparkSession getSparkSession() {
55 | return SparkSession
56 | .builder()
57 | .master("local[2]")
58 | .appName(HdfsFileClean.class.getSimpleName())
59 | .enableHiveSupport()
60 | .getOrCreate();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/table/HiveTableClean.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.table;
2 |
3 | import com.xkj.mlrc.clean.domain.ParamOption;
4 | import com.xkj.mlrc.clean.util.ArgsUtil;
5 | import com.xkj.mlrc.clean.util.HdfsUtils;
6 | import com.xkj.mlrc.clean.util.JdbcHelper;
7 | import com.xkj.mlrc.clean.util.PropsUtil;
8 | import com.xkj.mlrc.fsimage.GetFromFsImageInfo;
9 | import lombok.extern.slf4j.Slf4j;
10 | import org.apache.commons.io.FileUtils;
11 | import org.apache.commons.lang3.StringUtils;
12 | import org.apache.spark.api.java.function.FilterFunction;
13 | import org.apache.spark.broadcast.Broadcast;
14 | import org.apache.spark.sql.Dataset;
15 | import org.apache.spark.sql.Row;
16 | import org.apache.spark.sql.SaveMode;
17 | import org.apache.spark.sql.SparkSession;
18 | import scala.Tuple3;
19 | import scala.reflect.ClassManifestFactory;
20 |
21 | import java.io.File;
22 | import java.io.FileNotFoundException;
23 | import java.io.IOException;
24 | import java.io.InputStream;
25 | import java.net.URL;
26 | import java.util.List;
27 | import java.util.Properties;
28 |
29 | /**
30 | * @author lijf@2345.com
31 | * @date 2020/4/21 16:30
32 | * @desc
33 | */
34 | @Slf4j
35 | public class HiveTableClean {
36 | private static SparkSession spark;
37 | private static Properties properties;
38 | private static ParamOption option;
39 | private static volatile Broadcast> broadcast = null;
40 |
41 |
42 | public static void main(String[] args) throws IOException {
43 | init(args);
44 | loadTableInfoFromMysql();
45 | loadExpireDataByFsimage();
46 | cleanUnPartitiondTables();
47 | cleanPartitiondTables();
48 | }
49 |
50 | /**
51 | * 获取fsimage记录的所有过期的目录
52 | */
53 | public static void loadExpireDataByFsimage() {
54 | GetFromFsImageInfo fsImage = GetFromFsImageInfo.builder()
55 | .spark(spark)
56 | .avoidPrefix(option.avoidPrefix)
57 | .avoidSuffix(option.avoidSuffix)
58 | .expire(option.expire)
59 | .targetPath(option.targetPath)
60 | .hdfsroot(option.hdfsroot)
61 | .build();
62 | fsImage.getAllShouldDelDirsByLastAccesstime().createOrReplaceTempView("all_overdue_dirs");
63 | }
64 |
65 | /**
66 | * 初始化
67 | *
68 | * @param args 参数
69 | */
70 | private static void init(String[] args) {
71 | spark = getSparkSession();
72 | properties = new Properties();
73 | String url = PropsUtil.getProp("hive.meta.mysql.url");
74 | properties.put("url", url);
75 | properties.put("driver", PropsUtil.getProp("hive.meta.mysql.driver"));
76 | properties.put("user", PropsUtil.getProp("hive.meta.mysql.username"));
77 | properties.put("password", PropsUtil.getProp("hive.meta.mysql.password"));
78 | option = ArgsUtil.getOption(args);
79 | }
80 |
81 | /**
82 | * 清理非分区表
83 | */
84 | private static void cleanUnPartitiondTables() throws IOException {
85 | //查询出hive的所有表
86 | String sqlText = "select a.name as dbname," +
87 | " b.tbl_name," +
88 | " case when isnull(c.pkey_name) then 0 else 1 end as ispartition," +
89 | " d.location " +
90 | " from dbs a " +
91 | " join tbls b on(a.db_id=b.db_id)" +
92 | " left join partition_keys c on(b.tbl_id=c.tbl_id)" +
93 | " join sds d on(b.sd_id=d.sd_id)";
94 | spark.sql(sqlText).createOrReplaceTempView("all_hive_tables");
95 | String distinctLines = "select dbname," +
96 | " tbl_name," +
97 | " ispartition," +
98 | " location " +
99 | " from all_hive_tables " +
100 | " where ispartition=0" +
101 | " group by dbname,tbl_name,ispartition,location";
102 | // 去重记录
103 | spark.sql(distinctLines).createOrReplaceTempView("all_unpartitiond_tbs");
104 | // 获取所有要删的过期表,join fsimage的路径得到
105 | String getExpiredTbs = "select a.* from all_unpartitiond_tbs a join all_overdue_dirs b on(a.location=b.hdfs_abs_path)";
106 | Dataset allUnpartitiondTables = spark.sql(getExpiredTbs);
107 | // 过滤要排除掉的表
108 | allUnpartitiondTables = filterExclusionTables(allUnpartitiondTables);
109 | // 获取非分区表的库名、表名、路径
110 | allUnpartitiondTables
111 | .toJavaRDD()
112 | .map(row -> new Tuple3<>(row.getAs("dbname").toString(), row.getAs("tbl_name").toString(), row.getAs("location").toString()))
113 | .foreachPartition(partition -> {
114 | JdbcHelper jdbcHelper = JdbcHelper.getHiveInstance();
115 | while (partition.hasNext()){
116 | Tuple3 tableLine = partition.next();
117 | String dbname = tableLine._1();
118 | String tblName = tableLine._2();
119 | String location = tableLine._3();
120 | //删除表
121 | String table = dbname + "." + tblName;
122 | String sqlTextDrop = "drop table if exists " + table;
123 | try {
124 | jdbcHelper.execute(sqlTextDrop);
125 | log.info("删除表成功:" + table);
126 | } catch (Exception e) {
127 | log.info("删除表失败:" + table);
128 | e.printStackTrace();
129 | }
130 | //删除路径
131 | try {
132 | HdfsUtils.trashPath(location);
133 | log.info("删除路径成功:" + location);
134 | } catch (IOException e) {
135 | if (e instanceof FileNotFoundException) {
136 | log.info("删除路径成功:" + location);
137 | } else {
138 | log.error("删除路径失败:" + location);
139 | e.printStackTrace();
140 | }
141 | }
142 | }
143 | });
144 | }
145 |
146 | /**
147 | * 过滤掉要排除的库表
148 | *
149 | * @param tablesDataset tablesDataset
150 | * @return Dataset
151 | * @throws IOException IOException
152 | */
153 | private static Dataset filterExclusionTables(Dataset tablesDataset) throws IOException {
154 | String avoidTbls = option.avoidTbls;
155 | String avoidDb = option.avoidDb;
156 | String avoidTblsFile = option.avoidTblsFile;
157 |
158 | if (null != avoidDb) {
159 | tablesDataset = tablesDataset.filter(new FilterFunction() {
160 | @Override
161 | public boolean call(Row row) throws Exception {
162 | return !avoidDb.equalsIgnoreCase(row.getAs("dbname").toString());
163 | }
164 | });
165 | }
166 |
167 | if (null != avoidTbls) {
168 | String[] tables = avoidTbls.split(",");
169 | for (int i = 0; i < tables.length; i++) {
170 | String table = tables[i];
171 | tablesDataset = tablesDataset.filter(new FilterFunction() {
172 | @Override
173 | public boolean call(Row row) throws Exception {
174 | String dbname = row.getAs("dbname").toString();
175 | String tblName = row.getAs("tbl_name").toString();
176 | String tableName = dbname + "." + tblName;
177 | return !table.equalsIgnoreCase(tableName);
178 | }
179 | });
180 | }
181 | }
182 | if(null != avoidTblsFile){
183 | List tables = HdfsUtils.readByLine(avoidTblsFile);
184 | broadcast = spark.sparkContext().broadcast(tables, ClassManifestFactory.classType(List.class));
185 | tablesDataset = tablesDataset.filter(new FilterFunction() {
186 | List exclusionTablesValue = broadcast.value();
187 | @Override
188 | public boolean call(Row row) throws Exception {
189 | String dbname = row.getAs("dbname").toString();
190 | String tblName = row.getAs("tbl_name").toString();
191 | String table = dbname + "." + tblName;
192 | return !exclusionTablesValue.contains(table);
193 | }
194 | });
195 | }
196 | return tablesDataset;
197 | }
198 |
199 | /**
200 | * 清理分区表
201 | */
202 | private static void cleanPartitiondTables() throws IOException {
203 | String allPartitionTbs = "SELECT " +
204 | " a.name as dbname, " +
205 | " b.tbl_name, " +
206 | " c.location, " +
207 | " d.part_name," +
208 | " concat(location,'/',part_name) as part_location " +
209 | " FROM " +
210 | " dbs a " +
211 | " JOIN tbls b ON (a.db_id = b.db_id) " +
212 | " JOIN sds c ON (b.sd_id = c.sd_id) " +
213 | " JOIN partitions d ON (b.tbl_id = d.tbl_id) ";
214 | // 获取所有分区表
215 | spark.sql(allPartitionTbs).createOrReplaceTempView("allPartitionTbs");
216 | String getExpiredParts = "select a.* from allPartitionTbs a join all_overdue_dirs b on(a.part_location=b.hdfs_abs_path)";
217 | Dataset partitiondTables = spark.sql(getExpiredParts);
218 | partitiondTables = filterExclusionTables(partitiondTables);
219 | partitiondTables.foreachPartition(parttition -> {
220 | JdbcHelper jdbcHelper = JdbcHelper.getHiveInstance();
221 | while (parttition.hasNext()){
222 | Row row = parttition.next();
223 | String dbName = row.getAs("dbname").toString();
224 | String tblName = row.getAs("tbl_name").toString();
225 | String partLocation = row.getAs("part_location").toString();
226 | String partName = row.getAs("part_name").toString();
227 |
228 | // 解析出分区名
229 | String[] split = partName.split("/");
230 | for (int j = 0; j < split.length; j++) {
231 | String part = split[j];
232 | split[j] = part.replace("=", "='") + "'";
233 | }
234 | String partNameFmt = StringUtils.join(split, ",");
235 | String tableName = dbName + "." + tblName;
236 | String dropPartitionSql = "ALTER TABLE " + tableName + " DROP IF EXISTS PARTITION (" + partNameFmt + ")";
237 | try {
238 | jdbcHelper.execute(dropPartitionSql);
239 | log.info("删除表分区成功!表名:{},分区:{}", tableName, partNameFmt);
240 | } catch (Exception e) {
241 | log.info("删除表分区失败!表名:{},分区:{}", tableName, partNameFmt);
242 | e.printStackTrace();
243 | }
244 | //删除路径
245 | try {
246 | HdfsUtils.trashPath(partLocation);
247 | log.info("删除分区路径成功!表名:{},分区:{},路径:{}", tableName, partNameFmt, partLocation);
248 | } catch (Exception e) {
249 | if (e instanceof FileNotFoundException) {
250 | log.info("删除分区路径成功!表名:{},分区:{},路径:{}", tableName, partNameFmt, partLocation);
251 | } else {
252 | log.info("删除分区路径失败!表名:{},分区:{},路径:{}", tableName, partNameFmt, partLocation);
253 | e.printStackTrace();
254 | }
255 | }
256 |
257 | }
258 | });
259 |
260 | }
261 |
262 | /**
263 | * 读取mysql中hive的元数据得到所有表信息
264 | */
265 | private static void loadTableInfoFromMysql() {
266 | spark.read().jdbc(properties.getProperty("url"), "DBS", properties).write().mode(SaveMode.Overwrite).saveAsTable("dbs");
267 | spark.read().jdbc(properties.getProperty("url"), "PARTITION_KEYS", properties).write().mode(SaveMode.Overwrite).saveAsTable("partition_keys");
268 | spark.read().jdbc(properties.getProperty("url"), "SDS", properties).write().mode(SaveMode.Overwrite).saveAsTable("sds");
269 | spark.read().jdbc(properties.getProperty("url"), "PARTITION_KEY_VALS", properties).write().mode(SaveMode.Overwrite).saveAsTable("partition_key_vals");
270 | spark.read().jdbc(properties.getProperty("url"), "PARTITIONS", properties).write().mode(SaveMode.Overwrite).saveAsTable("partitions");
271 | spark.read().jdbc(properties.getProperty("url"), "TBLS", properties).write().mode(SaveMode.Overwrite).saveAsTable("tbls");
272 |
273 | }
274 |
275 | private static SparkSession getSparkSession() {
276 | return SparkSession
277 | .builder()
278 | .master("local[2]")
279 | .appName(HiveTableClean.class.getSimpleName())
280 | .enableHiveSupport()
281 | .getOrCreate();
282 | }
283 | }
284 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/util/ArgsUtil.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.util;
2 |
3 | import com.xkj.mlrc.clean.domain.ParamOption;
4 | import org.kohsuke.args4j.CmdLineException;
5 | import org.kohsuke.args4j.CmdLineParser;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | /**
10 | * @Author: lijf@2345.com
11 | * @Date: 2018/8/22 18:33
12 | * @Version: 1.0
13 | */
14 | public class ArgsUtil {
15 | private static Logger logger = LoggerFactory.getLogger(ArgsUtil.class);
16 | static CmdLineParser parser;
17 | private ArgsUtil() {}
18 |
19 | /**
20 | * 解析参数
21 | * @param args
22 | * @return
23 | */
24 | public static ParamOption getOption(String[] args){
25 | //开始解析命令参数
26 | ParamOption option = new ParamOption();
27 | parser = new CmdLineParser(option);
28 | try {
29 | parser.parseArgument(args);
30 | } catch (CmdLineException e) {
31 | logger.error(e.toString(),e);
32 | }
33 |
34 | return option;
35 | }
36 |
37 | /**
38 | * 参数说明
39 | */
40 | public static void showHelp(){
41 | parser.printUsage(System.out);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/util/DateUtil.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.util;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 |
6 | import java.text.ParseException;
7 | import java.text.SimpleDateFormat;
8 | import java.util.*;
9 |
10 | /**
11 | * @author: lijf@2345.com
12 | * @Date: 2018/7/5 13:47
13 | * @Version: 1.0
14 | */
15 | public class DateUtil {
16 |
17 | private DateUtil() { }
18 |
19 | public static final String DATE_FORMAT_MEDIUM = "yyyy-MM-dd";
20 | public static final String DATE_FORMAT_LONG= "yyyy-MM-dd HH:mm:ss";
21 | private static Logger logger = LoggerFactory.getLogger(DateUtil.class);
22 |
23 | /**
24 | * 判断时间是否在时间段内
25 | *
26 | * @param nowTime
27 | * @param beginTime
28 | * @param endTime
29 | * @return
30 | */
31 | public static boolean belongCalendar(Date nowTime, Date beginTime, Date endTime) {
32 | Calendar date = Calendar.getInstance();
33 | date.setTime(nowTime);
34 |
35 | Calendar begin = Calendar.getInstance();
36 | begin.setTime(beginTime);
37 |
38 | Calendar end = Calendar.getInstance();
39 | end.setTime(endTime);
40 |
41 | return (date.after(begin) && date.before(end));
42 | }
43 | /**
44 | * 格式化日期
45 | * @param fmt 格式
46 | * @return
47 | */
48 | public static String getCurrentFormatDate(String fmt) {
49 | String formatDate = "";
50 | try {
51 | SimpleDateFormat format = new SimpleDateFormat(fmt);
52 | Date date = new Date();
53 | formatDate = format.format(date);
54 | }catch (Exception e){
55 | logger.error(e.toString(),e);
56 | }
57 | return formatDate;
58 | }
59 |
60 | /**
61 | * 获取N天前后的凌晨零点 yyyy-MM-dd HH:mm:ss
62 | * @param n
63 | * @return
64 | */
65 | public static String getNDayBeforeOrAfterZeroMorning(int n) {
66 | Calendar instance = Calendar.getInstance();
67 | SimpleDateFormat sdfCn = new SimpleDateFormat(DATE_FORMAT_MEDIUM);
68 | instance.add(Calendar.DAY_OF_MONTH, n);
69 | Date parse = instance.getTime();
70 | String nDayBefore = sdfCn.format(parse);
71 |
72 | return nDayBefore+" 00:00:00";
73 | }
74 | /**
75 | * 获取前一天时间字符串,返回例子:2018-07-30
76 | */
77 | public static final String getYesterDay(){
78 | Date date=new Date();
79 | Calendar calendar = new GregorianCalendar();
80 | calendar.setTime(date);
81 | calendar.add(Calendar.DATE,-1);
82 | date=calendar.getTime();
83 | SimpleDateFormat formatter = new SimpleDateFormat(DATE_FORMAT_MEDIUM);
84 | return formatter.format(date);
85 | }
86 |
87 | /**
88 | * 转换成Timestamp
89 | * @param formatDt
90 | * @return
91 | */
92 | public static long parseToTimestamp(String format,String formatDt) {
93 | SimpleDateFormat formatDate = new SimpleDateFormat(format);
94 | Date date = null;
95 | try {
96 | date = formatDate.parse(formatDt);
97 | return date.getTime() / 1000;
98 | } catch (ParseException e) {
99 | logger.error(e.toString(),e);
100 | }
101 | return 0;
102 | }
103 |
104 | /**
105 | * 获取n天前的日期
106 | * @param format 格式
107 | * @param n 天数
108 | * @return
109 | * @throws ParseException
110 | */
111 | public static String getNDayFmtDAte(String format,int n) {
112 | Calendar instance = Calendar.getInstance();
113 | SimpleDateFormat sdfCn = new SimpleDateFormat(format);
114 | instance.add(Calendar.DAY_OF_MONTH, n);
115 | Date parse = instance.getTime();
116 | return sdfCn.format(parse);
117 | }
118 |
119 | /**
120 | * 根据day获取n天前的日期
121 | * @param format 格式
122 | * @param n 天数
123 | * @return
124 | * @throws ParseException
125 | */
126 | public static String getNDayFmtByDay(String format,String day,int n) {
127 | Calendar instance = Calendar.getInstance();
128 | SimpleDateFormat sdfCn = new SimpleDateFormat(format);
129 | try {
130 | Date date = sdfCn.parse(day);
131 | instance.setTime(date);
132 | } catch (ParseException e) {
133 | e.printStackTrace();
134 | }
135 | instance.add(Calendar.DAY_OF_MONTH, n);
136 | Date parse = instance.getTime();
137 | return sdfCn.format(parse);
138 | }
139 |
140 | /**
141 | * 获取前天的日期
142 | */
143 | public static final String getBeforeYe(){
144 | Date date=new Date();
145 | Calendar calendar = new GregorianCalendar();
146 | calendar.setTime(date);
147 | calendar.add(Calendar.DATE,-2);
148 | date=calendar.getTime();
149 | SimpleDateFormat formatter = new SimpleDateFormat(DATE_FORMAT_MEDIUM);
150 | return formatter.format(date);
151 | }
152 |
153 | /**
154 | * 获取前N天的日期(不包含今天)
155 | */
156 | public static List getNday(Integer num){
157 | if(num==null||num<=0){
158 | return new ArrayList<>();
159 | }
160 | SimpleDateFormat formatter = new SimpleDateFormat(DATE_FORMAT_MEDIUM);
161 | List li = new ArrayList<>();
162 | Date date=new Date();
163 | Calendar calendar = new GregorianCalendar();
164 | for(int i=num;i>=1;i--){
165 | calendar.setTime(date);
166 | calendar.add(Calendar.DATE,-i);
167 | li.add(formatter.format(calendar.getTime()));
168 | }
169 | return li;
170 | }
171 |
172 | /**
173 | * 格式化日期转为Date
174 | * @param fmtDate yyyy-MM-dd HH:mm:ss
175 | * @return date
176 | */
177 | public static Date parse2Date(String fmtDate){
178 | Date date;
179 | SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT_LONG);
180 | try {
181 | date = sdf.parse(fmtDate);
182 | } catch (ParseException e) {
183 | date = new Date();
184 | }
185 | return date;
186 | }
187 |
188 | /**
189 | * 获取当天还剩余的时间,单位:S
190 | * @return ...
191 | */
192 | public static int getLeftSecondsToday(){
193 | SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT_LONG);
194 |
195 | String nowStr = dateFormat.format(new Date());
196 |
197 | String patten = " ";
198 | String endStr = nowStr.substring(0,nowStr.indexOf(patten)) + " 23:59:59";
199 | int leftSeconds = 0;
200 | try {
201 | leftSeconds = Integer.valueOf((dateFormat.parse(endStr).getTime() - dateFormat.parse(nowStr).getTime()) / 1000+"");
202 | } catch (ParseException e) {
203 | logger.error(e.toString(),e);
204 | }
205 | return leftSeconds;
206 | }
207 |
208 | /**
209 | * 时间戳转换为格式化时间戳
210 | * @param timestamp
211 | * @return
212 | */
213 | public static String parseToFmtDateStr(Integer timestamp){
214 | SimpleDateFormat fmt = new SimpleDateFormat(DATE_FORMAT_LONG);
215 | return fmt.format(new Date(timestamp * 1000L));
216 | }
217 |
218 | /**
219 | * 根据两个日期获取两日期之间的日期
220 | * @param beginDay 格式为2019-05-08
221 | * @param endDay 格式为2019-05-09
222 | * @return
223 | */
224 | public static List getDays(String beginDay,String endDay){
225 | // 返回的日期集合
226 | List days = new ArrayList<>();
227 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
228 | try {
229 | Date start = dateFormat.parse(beginDay);
230 | Date end = dateFormat.parse(endDay);
231 |
232 | Calendar tempStart = Calendar.getInstance();
233 | tempStart.setTime(start);
234 |
235 | Calendar tempEnd = Calendar.getInstance();
236 | tempEnd.setTime(end);
237 | tempEnd.add(Calendar.DATE, +1);
238 | while (tempStart.before(tempEnd)) {
239 | days.add(dateFormat.format(tempStart.getTime()));
240 | tempStart.add(Calendar.DAY_OF_YEAR, 1);
241 | }
242 | } catch (ParseException e) {
243 | e.printStackTrace();
244 | }
245 | return days;
246 | }
247 |
248 | /**
249 | * 时间戳转换为格式化时间戳
250 | * @param timestamp
251 | * @return
252 | */
253 | public static String parseTimestamp2FmtDateStr(String format,Long timestamp){
254 | SimpleDateFormat fmt = new SimpleDateFormat(format);
255 | return fmt.format(new Date(timestamp * 1000L));
256 | }
257 |
258 | /**
259 | * 判断时间是否在时间段内
260 | * 传入24小时制格式,如01 03 表示凌晨1点与3点,15表示下午3点。
261 | * 04 到 19 表示凌晨4点到今天的下午7点
262 | * 19 到 09 表示晚上7点到第二天上午9点
263 | *
264 | * @param now ...
265 | * @param beginHour ...
266 | * @param endHour ...
267 | * @return ...
268 | */
269 | public static boolean judgeTimeBetween(Date now, String beginHour, String endHour) {
270 | int iBeginHour = Integer.valueOf(beginHour);
271 | int iEndHour = Integer.valueOf(endHour);
272 | if (iBeginHour == iEndHour) {
273 | return true;
274 | }
275 | Calendar date = Calendar.getInstance();
276 | date.set(Calendar.HOUR_OF_DAY, iBeginHour);
277 | date.set(Calendar.MINUTE, 0);
278 | date.set(Calendar.SECOND, 0);
279 | Date beginTime = date.getTime();
280 | if (iEndHour == 0) {
281 | date.set(Calendar.HOUR_OF_DAY, 23);
282 | date.set(Calendar.MINUTE, 59);
283 | date.set(Calendar.SECOND, 59);
284 | Date endTime = date.getTime();
285 | if (now.after(beginTime) && now.before(endTime)) {
286 | return true;
287 | }
288 | }
289 | if (iBeginHour < iEndHour) {
290 | date.set(Calendar.HOUR_OF_DAY, iEndHour);
291 | date.set(Calendar.MINUTE, 0);
292 | date.set(Calendar.SECOND, 0);
293 | Date endTime = date.getTime();
294 | if (now.after(beginTime) && now.before(endTime)) {
295 | return true;
296 | }
297 | } else {
298 | date.setTime(now);
299 | int nowHour = date.get(Calendar.HOUR_OF_DAY);
300 | if (nowHour >= iBeginHour) {
301 | return true;
302 | } else {
303 | if (nowHour < iEndHour) {
304 | return true;
305 | }
306 | }
307 | }
308 | return false;
309 | }
310 |
311 | public static void main(String[] args) {
312 | String nDayFmtDAte = DateUtil.getNDayFmtDAte("yyyy-MM-dd HH:mm:ss", 3);
313 | System.out.println(nDayFmtDAte);
314 | }
315 | }
316 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/util/HdfsUtils.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.util;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.*;
6 | import org.apache.hadoop.hdfs.DistributedFileSystem;
7 | import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
8 | import org.apache.log4j.Logger;
9 |
10 | import java.io.*;
11 | import java.net.URI;
12 | import java.util.ArrayList;
13 | import java.util.HashMap;
14 | import java.util.List;
15 | import java.util.Map;
16 |
17 | /**
18 | * @Author: lijf@2345.com
19 | * @Date: 2018/7/10 17:35
20 | * @Version: 1.0
21 | */
22 | public class HdfsUtils {
23 |
24 | private HdfsUtils() {
25 | }
26 |
27 | private static FileSystem hdfs;
28 | static Logger logger = Logger.getLogger(HdfsUtils.class);
29 | private static final String PATH_DELIMER = "/";
30 |
31 | static {
32 | //获取FileSystem类的方法有很多种,这里只写一种
33 | Configuration config = new Configuration();
34 | try {
35 | String hdfsUri = PropsUtil.getProp("ad.hdfs.root.uri");
36 | // 第一位为uri,第二位为config,第三位是登录的用户
37 | hdfs = FileSystem.get(new URI(hdfsUri), config);
38 |
39 | } catch (Exception e) {
40 | logger.error(e.toString(), e);
41 | }
42 | }
43 |
44 | /**
45 | * 检查文件或者文件夹是否存在
46 | *
47 | * @param filename
48 | * @return
49 | */
50 | public static boolean checkFileExist(String filename) {
51 | try {
52 | Path f = new Path(filename);
53 | return hdfs.exists(f);
54 | } catch (Exception e) {
55 | logger.error(e.toString(), e);
56 | }
57 | return false;
58 | }
59 |
60 | /**
61 | * 创建文件夹
62 | *
63 | * @param dirName
64 | * @return
65 | */
66 | public static boolean mkdir(String dirName) {
67 | if (checkFileExist(dirName)) {
68 | return true;
69 | }
70 | try {
71 | Path f = new Path(dirName);
72 | logger.info("Create and Write :" + f.getName() + " to hdfs");
73 | return hdfs.mkdirs(f);
74 | } catch (Exception e) {
75 | logger.error(e.toString(), e);
76 | }
77 |
78 | return false;
79 | }
80 |
81 | /**
82 | * 创建一个空文件
83 | *
84 | * @param filePath 文件的完整路径名称
85 | * @return
86 | */
87 | public static boolean mkfile(String filePath) {
88 | try {
89 | Path f = new Path(filePath);
90 | if (hdfs.exists(f)) {
91 | return true;
92 | }
93 | FSDataOutputStream os = hdfs.create(f, false);
94 | os.close();
95 | return true;
96 | } catch (IllegalArgumentException | IOException e) {
97 | logger.error(e.toString(), e);
98 | }
99 | return false;
100 | }
101 |
102 | /**
103 | * 复制文件到指定目录
104 | *
105 | * @param srcfile srcfile
106 | * @param desfile desfile
107 | * @return boolean
108 | * @throws IOException IOException
109 | */
110 | public static boolean hdfsCopyUtils(String srcfile, String desfile) throws IOException {
111 | Configuration conf = new Configuration();
112 | Path src = new Path(srcfile);
113 | Path dst = new Path(desfile);
114 | FileUtil.copy(src.getFileSystem(conf), src,
115 | dst.getFileSystem(conf), dst, false, conf);
116 |
117 | return true;
118 | }
119 |
120 | /**
121 | * 移动文件或者文件夹
122 | *
123 | * @param src 初始路径
124 | * @param dst 移动结束路径
125 | * @throws Exception
126 | */
127 | public static void movefile(String src, String dst) throws IOException {
128 | Path p1 = new Path(src);
129 | Path p2 = new Path(dst);
130 | hdfs.rename(p1, p2);
131 | }
132 |
133 | /**
134 | * 删除文件或者文件夹
135 | *
136 | * @param src
137 | * @throws Exception
138 | */
139 | public static void delete(String src) throws IOException {
140 | Path p1 = new Path(src);
141 | if (hdfs.isDirectory(p1)) {
142 | hdfs.delete(p1, true);
143 | logger.info("删除文件夹成功: " + src);
144 | } else if (hdfs.isFile(p1)) {
145 | hdfs.delete(p1, false);
146 | logger.info("删除文件成功: " + src);
147 | }
148 |
149 | }
150 |
151 | /**
152 | * 读取本地文件到HDFS系统, 保证文件格式是utf-8
153 | *
154 | * @param localFilename
155 | * @param hdfsPath
156 | * @return
157 | */
158 | public static boolean copyLocalFileToHDFS(String localFilename, String hdfsPath) {
159 | // 如果路径不存在就创建文件夹
160 | mkdir(hdfsPath);
161 |
162 | File file = new File(localFilename);
163 |
164 | // 如果hdfs上已经存在文件,那么先删除该文件
165 | if (HdfsUtils.checkFileExist(hdfsPath + PATH_DELIMER + file.getName())) {
166 | try {
167 | delete(hdfsPath + PATH_DELIMER + file.getName());
168 | } catch (IOException e) {
169 | e.printStackTrace();
170 | }
171 | }
172 |
173 | Path f = new Path(hdfsPath + PATH_DELIMER + file.getName());
174 | try (
175 | FileInputStream is = new FileInputStream(file);
176 | FSDataOutputStream os = hdfs.create(f, true)
177 | ) {
178 | byte[] buffer = new byte[10240000];
179 | int nCount = 0;
180 |
181 | while (true) {
182 | int bytesRead = is.read(buffer);
183 | if (bytesRead <= 0) {
184 | break;
185 | }
186 |
187 | os.write(buffer, 0, bytesRead);
188 | nCount++;
189 | if (nCount % (100) == 0) {
190 | logger.info(" Have move " + nCount + " blocks");
191 | }
192 | }
193 | logger.info(" Write content of file " + file.getName()
194 | + " to hdfs file " + f.getName() + " success");
195 | return true;
196 | } catch (Exception e) {
197 | logger.error(e.toString(), e);
198 | }
199 | return false;
200 | }
201 |
202 | /**
203 | * 复制本地文件夹到hdfs的文件
204 | *
205 | * @param localPath
206 | * @param hdfsPath
207 | * @return
208 | */
209 | public static boolean copyLocalDirTohdfs(String localPath, String hdfsPath) {
210 | try {
211 | File root = new File(localPath);
212 | File[] files = root.listFiles();
213 |
214 | for (File file : files) {
215 | if (file.isFile()) {
216 | copyLocalFileToHDFS(file.getPath(), hdfsPath);
217 |
218 | } else if (file.isDirectory()) {
219 | copyLocalDirTohdfs(localPath + "/" + file.getName(), hdfsPath + "/" + file.getName());
220 | }
221 | }
222 | return true;
223 | } catch (Exception e) {
224 | logger.error(e.toString(), e);
225 | }
226 | return false;
227 | }
228 |
229 |
230 | /**
231 | * 从hdfs下载
232 | *
233 | * @param hdfsFilename
234 | * @param localPath
235 | * @return
236 | */
237 | public static boolean downloadFileFromHdfs(String hdfsFilename, String localPath) {
238 |
239 | Path f = new Path(hdfsFilename);
240 | File file = new File(localPath + PATH_DELIMER + f.getName());
241 | try (
242 | FSDataInputStream dis = hdfs.open(f);
243 | FileOutputStream os = new FileOutputStream(file);
244 | ) {
245 | byte[] buffer = new byte[1024000];
246 | int length = 0;
247 | while ((length = dis.read(buffer)) > 0) {
248 | os.write(buffer, 0, length);
249 | }
250 | return true;
251 | } catch (Exception e) {
252 | logger.error(e.toString(), e);
253 | }
254 | return false;
255 | }
256 |
257 | /**
258 | * HDFS 到 HDFS 的合并
259 | * hdfs提供了一种FileUtil.copyMerge()的方法, 注意下面的 false 这个,如果改为true,就会删除这个目录
260 | *
261 | * @param folder 需要合并的目录
262 | * @param file 要合并成的文件,完整路径名称
263 | */
264 | public static void copyMerge(String folder, String file) {
265 | Configuration conf = new Configuration();
266 | Path src = new Path(folder);
267 | Path dst = new Path(file);
268 |
269 | try {
270 | FileUtil.copyMerge(src.getFileSystem(conf), src,
271 | dst.getFileSystem(conf), dst, false, conf, null);
272 | } catch (IOException e) {
273 | logger.error(e.toString(), e);
274 | }
275 | }
276 |
277 |
278 | /**
279 | * 列出所有DataNode的名字信息
280 | */
281 | public static void listDataNodeInfo() {
282 | try {
283 | DistributedFileSystem fs = null;
284 | fs = (DistributedFileSystem) hdfs;
285 | DatanodeInfo[] dataNodeStats = fs.getDataNodeStats();
286 | String[] names = new String[dataNodeStats.length];
287 | logger.info("List of all the datanode in the HDFS cluster:");
288 | for (int i = 0; i < names.length; i++) {
289 | names[i] = dataNodeStats[i].getHostName();
290 | logger.info(names[i]);
291 | }
292 | logger.info(hdfs.getUri().toString());
293 | } catch (Exception e) {
294 | logger.error(e.toString(), e);
295 | }
296 | }
297 |
298 |
299 | public static boolean mergeDirFiles(List fileList, String tarPath, String rowTerminateFlag) {
300 |
301 | Path tarFile = new Path(tarPath);
302 | try (FSDataOutputStream tarFileOutputStream = hdfs.create(tarFile, true)) {
303 | byte[] buffer = new byte[1024000];
304 | int length = 0;
305 | long nTotalLength = 0;
306 | int nCount = 0;
307 | boolean bfirst = true;
308 | for (FileStatus file : fileList) {
309 | if (file.getPath().equals(tarFile)) {
310 | continue;
311 | }
312 | logger.info(" merging file from " + file.getPath() + " to " + tarPath);
313 |
314 | if (!bfirst) {
315 | //添加换行符
316 | tarFileOutputStream.write(rowTerminateFlag.getBytes(), 0, rowTerminateFlag.length());
317 | }
318 | try (
319 | FSDataInputStream srcFileInputStream = hdfs.open(file.getPath(), buffer.length);
320 | ) {
321 | while ((length = srcFileInputStream.read(buffer)) > 0) {
322 | nCount++;
323 | tarFileOutputStream.write(buffer, 0, length);
324 | nTotalLength += length;
325 | if (nCount % 1000 == 0) {
326 | tarFileOutputStream.flush();
327 | logger.info("Have move " + (nTotalLength / 1024000) + " MB");
328 | }
329 |
330 | }
331 | }
332 | bfirst = false;
333 | }
334 |
335 | } catch (Exception e) {
336 | logger.error(e.toString(), e);
337 | try {
338 | delete(tarPath);
339 | } catch (IOException e1) {
340 | e1.printStackTrace();
341 | }
342 | return false;
343 | }
344 | return true;
345 | }
346 |
347 |
348 | /**
349 | * 将一个字符串写入某个路径
350 | *
351 | * @param text 要保存的字符串
352 | * @param path 要保存的路径
353 | */
354 | public static void writerString(String text, String path) {
355 |
356 | try {
357 | Path f = new Path(path);
358 | FSDataOutputStream os = hdfs.append(f);
359 | BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os, "utf-8"));
360 | writer.write(text);
361 | writer.close();
362 | os.close();
363 |
364 | } catch (Exception e) {
365 | logger.error(e.toString(), e);
366 | }
367 |
368 | }
369 |
370 | /**
371 | * 按行读取文件内容,并且防止乱码
372 | *
373 | * @param hdfsFilename
374 | * @return
375 | */
376 | public static List readByLine(String hdfsFilename) {
377 | List list = new ArrayList<>();
378 | Path f = new Path(hdfsFilename);
379 | try (
380 | FSDataInputStream dis = hdfs.open(f);
381 | BufferedReader bf = new BufferedReader(new InputStreamReader(dis));) {
382 | String line = null;
383 | while ((line = bf.readLine()) != null) {
384 | list.add(new String(line.getBytes(), "utf-8"));
385 | }
386 | return list;
387 | } catch (Exception e) {
388 | logger.error(e.toString(), e);
389 | return list;
390 | }
391 | }
392 |
393 | /**
394 | * 按行读取文件内容,并且防止乱码
395 | *
396 | * @param hdfsDir
397 | * @return
398 | */
399 | public static List listFiles(String hdfsDir) {
400 | List listFiles = new ArrayList<>();
401 | try {
402 | Path path = new Path(hdfsDir);
403 | if (!hdfs.exists(path)) {
404 | return listFiles;
405 | }
406 | FileStatus[] fileStatuses = hdfs.listStatus(path);
407 | for (int i = 0; i < fileStatuses.length; i++) {
408 | FileStatus fileStatus = fileStatuses[i];
409 | String fileName = fileStatus.getPath().getName();
410 | listFiles.add(fileName);
411 | }
412 | } catch (Exception e) {
413 | e.printStackTrace();
414 | return listFiles;
415 | }
416 | return listFiles;
417 | }
418 |
419 | /**
420 | * 获取子文件或文件的最后更新时间
421 | *
422 | * @param uri 路径地址
423 | * @return
424 | */
425 | public static Map getFilesModifyTime(String uri) {
426 | Map map = new HashMap<>();
427 | try {
428 | if (hdfs.isDirectory(new Path(uri))) {
429 | FileStatus[] fileStatuses = hdfs.listStatus(new Path(uri));
430 | for (int i = 0; i < fileStatuses.length; i++) {
431 | FileStatus fileStatus = fileStatuses[i];
432 | String name = fileStatus.getPath().toUri().toString();
433 | long modificationTime = fileStatus.getModificationTime();
434 | map.put(name, modificationTime);
435 | }
436 | } else {
437 | Path path = new Path(uri);
438 | if (hdfs.exists(path)) {
439 | FileStatus fileStatus = hdfs.getFileStatus(path);
440 | String name = fileStatus.getPath().toUri().toString();
441 | long modificationTime = fileStatus.getModificationTime();
442 | map.put(name, modificationTime);
443 | }
444 | }
445 | } catch (IOException e) {
446 | e.printStackTrace();
447 | return map;
448 | }
449 | return map;
450 | }
451 |
452 |
453 | /**
454 | * 把路径放入回收站
455 | *
456 | * @param src 目标路径
457 | * @return boolean
458 | * @throws IOException IOException
459 | */
460 | public static boolean trashPath(String src) throws IOException {
461 | Path path = new Path(src);
462 | Trash trashTmp = new Trash(hdfs, hdfs.getConf());
463 | if (hdfs.exists(path)) {
464 | if (trashTmp.moveToTrash(path)) {
465 | return true;
466 | }
467 | }
468 | return false;
469 | }
470 |
471 | }
472 |
--------------------------------------------------------------------------------
/src/main/java/com/xkj/mlrc/clean/util/JdbcHelper.java:
--------------------------------------------------------------------------------
1 | package com.xkj.mlrc.clean.util;
2 |
3 | import com.alibaba.druid.pool.DruidDataSource;
4 | import com.alibaba.druid.pool.DruidDataSourceFactory;
5 | import com.alibaba.druid.pool.DruidPooledConnection;
6 | import org.apache.log4j.Logger;
7 |
8 | import java.io.IOException;
9 | import java.sql.*;
10 | import java.util.List;
11 | import java.util.Properties;
12 |
13 |
14 | /**
15 | * JDBC辅助组件
16 | *
17 | * @author Administrator
18 | */
19 | public class JdbcHelper {
20 |
21 | static Logger loggrt = Logger.getLogger(JdbcHelper.class);
22 | private static DruidDataSource druidDataSource = null;
23 | private static JdbcHelper instance = null;
24 |
25 |
26 |
27 | /**
28 | * 获取单例
29 | *
30 | * @return 单例
31 | */
32 | public static JdbcHelper getHiveInstance() throws Exception {
33 | if (instance == null) {
34 | instance = new JdbcHelper();
35 | Properties properties = new Properties();
36 | String url = PropsUtil.getProp("hive.jdbc.url");
37 | String user = PropsUtil.getProp("hive.jdbc.user");
38 | String password = PropsUtil.getProp("hive.jdbc.password");
39 | String driver = PropsUtil.getProp("hive.jdbc.driver");
40 | properties.put("driverClassName",driver);
41 | properties.put("url",url);
42 | properties.put("username",user);
43 | properties.put("password",password);
44 | druidDataSource = (DruidDataSource) DruidDataSourceFactory.createDataSource(properties);
45 | }
46 | return instance;
47 | }
48 |
49 |
50 | /**
51 | * 返回druid数据库连接
52 | *
53 | * @return
54 | * @throws SQLException
55 | */
56 | public DruidPooledConnection getConnection() throws SQLException {
57 |
58 | return druidDataSource.getConnection();
59 | }
60 |
61 | /**
62 | * 执行增删改SQL语句
63 | *
64 | * @param sql
65 | * @param params
66 | * @return 影响的行数
67 | */
68 | public int executeUpdate(String sql, Object[] params) {
69 | int rtn = 0;
70 | Connection conn = null;
71 | PreparedStatement pstmt = null;
72 |
73 | try {
74 | conn = getConnection();
75 | conn.setAutoCommit(false);
76 |
77 | pstmt = conn.prepareStatement(sql);
78 |
79 | if (params != null && params.length > 0) {
80 | for (int i = 0; i < params.length; i++) {
81 | pstmt.setObject(i + 1, params[i]);
82 | }
83 | }
84 |
85 | rtn = pstmt.executeUpdate();
86 |
87 | conn.commit();
88 | } catch (Exception e) {
89 | loggrt.error(e.toString(), e);
90 | } finally {
91 | if (pstmt != null) {
92 | try {
93 | pstmt.close();
94 | } catch (SQLException e) {
95 | loggrt.error(e.toString(), e);
96 | }
97 | }
98 | if (conn != null) {
99 | try {
100 | conn.close();
101 | } catch (SQLException e) {
102 | loggrt.error(e.toString(), e);
103 | }
104 | }
105 | }
106 |
107 | return rtn;
108 | }
109 |
110 | /**
111 | * 执行查询SQL语句
112 | *
113 | * @param sql
114 | * @param params
115 | * @param callback
116 | */
117 | public void executeQuery(String sql, Object[] params,
118 | QueryCallback callback) {
119 | Connection conn = null;
120 | PreparedStatement pstmt = null;
121 | ResultSet rs = null;
122 |
123 | try {
124 | conn = getConnection();
125 | pstmt = conn.prepareStatement(sql);
126 |
127 | if (params != null && params.length > 0) {
128 | for (int i = 0; i < params.length; i++) {
129 | pstmt.setObject(i + 1, params[i]);
130 | }
131 | }
132 |
133 | rs = pstmt.executeQuery();
134 |
135 | callback.process(rs);
136 | } catch (Exception e) {
137 | loggrt.error(e.toString(), e);
138 | } finally {
139 | if (pstmt != null) {
140 | try {
141 | pstmt.close();
142 | } catch (SQLException e) {
143 | loggrt.error(e.toString(), e);
144 | }
145 | }
146 | if (conn != null) {
147 | try {
148 | conn.close();
149 | } catch (SQLException e) {
150 | loggrt.error(e.toString(), e);
151 | }
152 | }
153 | }
154 | }
155 |
156 | /**
157 | * 执行查询SQL语句
158 | * @param sql
159 |
160 | */
161 | public void execute(String sql) {
162 | Connection conn = null;
163 | Statement pstmt = null;
164 |
165 | try {
166 | conn = getConnection();
167 | pstmt = conn.createStatement();
168 | pstmt.execute(sql);
169 | } catch (Exception e) {
170 | loggrt.error(e.toString(), e);
171 | } finally {
172 | if (pstmt != null) {
173 | try {
174 | pstmt.close();
175 | } catch (SQLException e) {
176 | loggrt.error(e.toString(), e);
177 | }
178 | }
179 | if (conn != null) {
180 | try {
181 | conn.close();
182 | } catch (SQLException e) {
183 | loggrt.error(e.toString(), e);
184 | }
185 | }
186 | }
187 | }
188 |
189 | /**
190 | * @param sql
191 | * @param paramsList
192 | * @return 每条SQL语句影响的行数
193 | */
194 | public int[] executeBatch(String sql, List