").append(m.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,e||[a.responseText,b,a])}),this},m.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){m.fn[b]=function(a){return this.on(b,a)}}),m.expr.filters.animated=function(a){return m.grep(m.timers,function(b){return a===b.elem}).length};var cd=a.document.documentElement;function dd(a){return m.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}m.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=m.css(a,"position"),l=m(a),n={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=m.css(a,"top"),i=m.css(a,"left"),j=("absolute"===k||"fixed"===k)&&m.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),m.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(n.top=b.top-h.top+g),null!=b.left&&(n.left=b.left-h.left+e),"using"in b?b.using.call(a,n):l.css(n)}},m.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){m.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,m.contains(b,e)?(typeof e.getBoundingClientRect!==K&&(d=e.getBoundingClientRect()),c=dd(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===m.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),m.nodeName(a[0],"html")||(c=a.offset()),c.top+=m.css(a[0],"borderTopWidth",!0),c.left+=m.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-m.css(d,"marginTop",!0),left:b.left-c.left-m.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||cd;while(a&&!m.nodeName(a,"html")&&"static"===m.css(a,"position"))a=a.offsetParent;return a||cd})}}),m.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);m.fn[a]=function(d){return V(this,function(a,d,e){var f=dd(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?m(f).scrollLeft():e,c?e:m(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),m.each(["top","left"],function(a,b){m.cssHooks[b]=Lb(k.pixelPosition,function(a,c){return c?(c=Jb(a,b),Hb.test(c)?m(a).position()[b]+"px":c):void 0})}),m.each({Height:"height",Width:"width"},function(a,b){m.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){m.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return V(this,function(b,c,d){var e;return m.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?m.css(b,c,g):m.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),m.fn.size=function(){return this.length},m.fn.andSelf=m.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return m});var ed=a.jQuery,fd=a.$;return m.noConflict=function(b){return a.$===m&&(a.$=fd),b&&a.jQuery===m&&(a.jQuery=ed),m},typeof b===K&&(a.jQuery=a.$=m),m});
5 |
--------------------------------------------------------------------------------
/Aiqiyi_Web/src/main/resources/templates/count.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
Title
9 |
10 |
11 |
12 |
13 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/Aiqiyi_Web/src/main/resources/templates/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
Hello Title
8 |
9 |
10 |
11 |
12 |
58 |
59 |
--------------------------------------------------------------------------------
/Aiqiyi_Web/src/test/java/com/example/web/AiqiyiWebApplicationTests.java:
--------------------------------------------------------------------------------
1 | package com.example.web;
2 |
3 | import org.junit.Test;
4 | import org.junit.runner.RunWith;
5 | import org.springframework.boot.test.context.SpringBootTest;
6 | import org.springframework.test.context.junit4.SpringRunner;
7 |
8 | @RunWith(SpringRunner.class)
9 | @SpringBootTest
10 | public class AiqiyiWebApplicationTests {
11 |
12 | @Test
13 | public void contextLoads() {
14 | }
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AiqiyiSparkStreamingProject
2 | SparkStreaming爱奇艺实时流统计项目——实战笔记
3 |
4 | ## 一、使用步骤
5 | 1. 将项目克隆到本地
6 | 2. 将Aiqiyi_SparkStreaming和Aiqiyi_Web项目分别导入IDEA
7 | 3. 正确设置Aiqiyi_Data里文件的路径
8 | 4. 阅读代码,运行项目
9 |
10 | ## 二、软件版本
11 | 1. hadoop-2.6.4
12 | 2. zookeeper-3.4.5
13 | 3. kafka_2.12-0.11.0.2
14 | 4. apache-flume-1.6.0-bin
15 | 5. hbase-0.99.2-bin
16 | 6. spark-2.1.0-bin-hadoop2.6
17 |
18 | ## 三、项目需求
19 | 1. 统计爱奇艺每个视频类别的访问量
20 | 2. 统计从搜索引擎引流过来的类别的访问量
21 |
22 | ## 四、项目思路
23 | 1. 编写python脚本模拟产生日志
24 | 2. flume采集日志传送到kafka
25 | 3. StreamingApp主程序从kafka获取日志并进行清洗
26 | 4. 由清洗日志统计每个类别访问量,并保存到hbase数据库
27 | 5. 由清洗日志统计从搜索引擎引流过来的类别的访问量,并保存到hbase数据库
28 | 6. 通过读取hbase数据库的数据,进行数据可视化展示
29 |
30 | ## 五、操作步骤
31 |
32 | ### 1. 启动hadoop
33 | > [hadoop@mini1 hadoop]$ sbin/start-all.sh
34 |
35 | ### 2. 启动zookeeper(三台机器)
36 | > [hadoop@mini1 zookeeper]$ bin/zkServer.sh start
37 |
38 | ### 3. kafka
39 | - 启动kafka(三台机器)
40 | > [hadoop@mini1 kafka]$ bin/kafka-server-start.sh config/server.properties &
41 | - 创建topic
42 | > [hadoop@mini1 kafka]$ bin/kafka-topics.sh \
43 | --create \
44 | --zookeeper mini1:2181 \
45 | --replication-factor 1 \
46 | --partitions 1 \
47 | --topic flumeTopic
48 | - 启动consumer
49 | > [hadoop@mini1 kafka]$ bin/kafka-console-consumer.sh \
50 | --zookeeper mini1:2181 \
51 | --topic flumeTopic \
52 | --from-beginning
53 |
54 | ### 4. flume
55 | - 增加配置文件a1.conf
56 | - 启动flume
57 | > [hadoop@mini1 flume]$ bin/flume-ng agent \
58 | -c conf \
59 | -f conf/a1.conf \
60 | -n a1 \
61 | -Dflume.root.logger=INFO,console
62 |
63 | ### 5. hbase
64 | - 启动hbase
65 | > [hadoop@mini1 hbase]$ bin/start-hbase.sh
66 | - 启动hbase shell
67 | > [hadoop@mini1 hbase]$ bin/hbase shell
68 | - 创建hbase表
69 | > hbase(main):001:0> create 'type','info' \
70 | hbase(main):001:1> create 'search','info'
71 |
72 | ### 6. 运行程序
73 | > 运行StreamingApp程序,准备接收、处理、保存数据
74 |
75 | ### 7. 执行python脚本
76 | > [hadoop@mini1 aiqiyi_logs]$ ./log_generator.sh
77 |
78 | ### 8. 查看hbase表情况
79 | > hbase(main):007:2> scan 'type' \
80 | hbase(main):007:3> scan 'search'
81 |
82 | ## 六、Spark on YARN
83 | ### 1. 打包并上传jar包
84 | > - File → Project Structure → Artifacts → “+” → JAR → From modules with dependencies → Main Class:StreamingApp → OK
85 | > - Build → Build Artifacts → Build
86 | ### 2. 提交作业
87 | > [hadoop@mini1 spark]$ bin/spark-submit \
88 | --master yarn \
89 | --class main.StreamingApp \
90 | /home/hadoop/aiqiyi_logs/Aiqiyi_SparkStreaming.jar
91 | ### 3. 执行python脚本
92 | ### 4. 查看hbase表情况
93 |
94 | ## 七、其他情况
95 | ### 1. 查看进程
96 | [hadoop@mini1 ~]$ jps \
97 | 8458 Main // hbase shell \
98 | 7426 Hmaster \
99 | 4325 NameNode \
100 | 4470 SecondaryNameNode \
101 | 2076 QuorumPeerMain \
102 | 2941 Kafka \
103 | 4605 ResourceManager \
104 | 3517 Application // flume \
105 | 7662 Jps \
106 | 3230 ConsoleConsumer
107 |
108 | [hadoop@mini2 ~]$ jps \
109 | 2194 Kafka \
110 | 1556 QuorumPeerMain \
111 | 2582 NodeManager \
112 | 2508 DataNode \
113 | 4334 Jps \
114 | 4063 HRegionServer
115 |
116 | [hadoop@mini3 ~]$ jps \
117 | 2497 DataNode \
118 | 3749 Jps \
119 | 3607 HRegionServer \
120 | 2184 Kafka \
121 | 2588 NodeManager \
122 | 1647 QuorumPeerMain
123 |
124 | ### 2. hbase表结果
125 | 
126 |
127 | ### 3. 相关文件
128 | 
129 |
130 | ## 八、可视化展示
131 | ### 1. 项目运行在本地
132 | - 读取hbase表数据
133 | 
134 | - 浏览器访问
135 | > localhost:8080/count
136 | - 展示效果
137 | 
138 |
139 | ### 2. 项目运行在linux
140 | - 打包并上传jar包
141 | > Maven Projects -> Lifecycle -> package
142 | - 运行作业
143 | > [hadoop@mini1 aiqiyi_logs]$ java –jar aiqiyiweb-0.0.1-SNAPSHOT.jar
144 | - 浏览器访问
145 | > mini1:8080/count
146 |
147 |
148 |
--------------------------------------------------------------------------------