├── README.md
├── adverStat
    ├── adverStat.iml
    ├── pom.xml
    ├── src
    │   └── main
    │   │   ├── java
    │   │       └── scala
    │   │       │   ├── JdbcHelper.scala
    │   │       │   └── advertStat.scala
    │   │   └── resources
    │   │       └── ad.sql
    └── target
    │   └── classes
    │       ├── META-INF
    │           └── adverStat.kotlin_module
    │       └── scala
    │           ├── AdBlacklistDAO$$anon$1.class
    │           ├── AdBlacklistDAO$.class
    │           ├── AdBlacklistDAO.class
    │           ├── AdClickTrendDAO$$anon$5.class
    │           ├── AdClickTrendDAO$.class
    │           ├── AdClickTrendDAO.class
    │           ├── AdProvinceTop3DAO$.class
    │           ├── AdProvinceTop3DAO.class
    │           ├── AdStatDAO$$anon$4.class
    │           ├── AdStatDAO$.class
    │           ├── AdStatDAO.class
    │           ├── AdUserClickCountDAO$$anon$2.class
    │           ├── AdUserClickCountDAO$$anon$3.class
    │           ├── AdUserClickCountDAO$.class
    │           ├── AdUserClickCountDAO.class
    │           ├── advertStat$$typecreator5$1.class
    │           ├── advertStat$.class
    │           ├── advertStat.class
    │           ├── test$.class
    │           └── test.class
├── commons
    ├── commons.iml
    ├── pom.xml
    ├── src
    │   └── main
    │   │   ├── java
    │   │       └── commons
    │   │       │   ├── conf
    │   │       │       └── ConfigurationManager.scala
    │   │       │   ├── constant
    │   │       │       └── Constants.scala
    │   │       │   ├── model
    │   │       │       └── DataModel.scala
    │   │       │   ├── pool
    │   │       │       └── PooledMySqlClientFactory.scala
    │   │       │   └── utils
    │   │       │       └── Utils.scala
    │   │   └── resources
    │   │       ├── commerce.properties
    │   │       └── log4j.properties
    └── target
    │   └── classes
    │       ├── commerce.properties
    │       ├── commons
    │           ├── conf
    │           │   ├── ConfigurationManager$.class
    │           │   └── ConfigurationManager.class
    │           ├── constant
    │           │   ├── Constants$.class
    │           │   └── Constants.class
    │           ├── model
    │           │   ├── AdBlacklist$.class
    │           │   ├── AdBlacklist.class
    │           │   ├── AdClickTrend$.class
    │           │   ├── AdClickTrend.class
    │           │   ├── AdProvinceTop3$.class
    │           │   ├── AdProvinceTop3.class
    │           │   ├── AdStat$.class
    │           │   ├── AdStat.class
    │           │   ├── AdUserClickCount$.class
    │           │   ├── AdUserClickCount.class
    │           │   ├── ProductInfo$.class
    │           │   ├── ProductInfo.class
    │           │   ├── SessionAggrStat$.class
    │           │   ├── SessionAggrStat.class
    │           │   ├── SessionDetail$.class
    │           │   ├── SessionDetail.class
    │           │   ├── SessionRandomExtract$.class
    │           │   ├── SessionRandomExtract.class
    │           │   ├── Top10Category$.class
    │           │   ├── Top10Category.class
    │           │   ├── Top10Session$.class
    │           │   ├── Top10Session.class
    │           │   ├── UserInfo$.class
    │           │   ├── UserInfo.class
    │           │   ├── UserVisitAction$.class
    │           │   └── UserVisitAction.class
    │           ├── pool
    │           │   ├── CreateMySqlPool$.class
    │           │   ├── CreateMySqlPool.class
    │           │   ├── MySqlProxy$.class
    │           │   ├── MySqlProxy.class
    │           │   ├── PooledMySqlClientFactory$.class
    │           │   ├── PooledMySqlClientFactory.class
    │           │   └── QueryCallback.class
    │           └── utils
    │           │   ├── DateUtils$.class
    │           │   ├── DateUtils.class
    │           │   ├── NumberUtils$.class
    │           │   ├── NumberUtils.class
    │           │   ├── ParamUtils$.class
    │           │   ├── ParamUtils.class
    │           │   ├── StringUtil$.class
    │           │   ├── StringUtil.class
    │           │   ├── ValidUtils$.class
    │           │   └── ValidUtils.class
    │       ├── log4j.properties
    │       └── test
    │           ├── DataModel.scala
    │           ├── JdbcHelper.scala
    │           ├── PageConvertStat.scala
    │           ├── PageStat.scala
    │           └── ad.sql
├── mock
    ├── mock.iml
    ├── pom.xml
    ├── src
    │   └── main
    │   │   └── java
    │   │       └── scala
    │   │           ├── MockDataGenerate.scala
    │   │           └── MockRealTimeData.scala
    └── target
    │   └── classes
    │       └── scala
    │           ├── MockDataGenerate$$typecreator13$1.class
    │           ├── MockDataGenerate$$typecreator21$1.class
    │           ├── MockDataGenerate$$typecreator5$1.class
    │           ├── MockDataGenerate$.class
    │           ├── MockDataGenerate.class
    │           ├── MockRealTimeData$.class
    │           └── MockRealTimeData.class
├── pom.xml
├── readme.md
└── session
    ├── pom.xml
    ├── session.iml
    ├── src
        └── main
        │   └── java
        │       ├── scala
        │           ├── sessionAccumulator.scala
        │           └── sessionStat.scala
        │       └── server
        │           ├── SortKey.scala
        │           ├── serverFive.scala
        │           ├── serverFour.scala
        │           ├── serverOne.scala
        │           ├── serverThree.scala
        │           └── serverTwo.scala
    └── target
        └── classes
            ├── META-INF
                └── session.kotlin_module
            ├── scala
                ├── sessionAccumulator.class
                ├── sessionStat$.class
                └── sessionStat.class
            └── server
                ├── SortKey$.class
                ├── SortKey.class
                ├── serverFive.class
                ├── serverFour.class
                ├── serverOne$$typecreator4$1.class
                ├── serverOne$$typecreator4$2.class
                ├── serverOne$$typecreator5$1.class
                ├── serverOne$$typecreator5$2.class
                ├── serverOne.class
                ├── serverThree.class
                └── serverTwo.class


/README.md:
--------------------------------------------------------------------------------
  1 | >电商分析平台
  2 | 
  3 | 该项目是我根据尚硅谷大数据电商分析平台视频做的笔记,总共分成了大概十个需求,每个需求我都用一篇文章来解析
  4 | 
  5 | 
  6 | ## 项目文章目录:
  7 | [项目搭建及,commons模块解析,离线实时数据准备](https://blog.csdn.net/zisuu/article/details/106361630)
  8 | 
  9 | [项目需求解析](https://blog.csdn.net/zisuu/article/details/106302167)
 10 | 
 11 | 
 12 | 
 13 | [需求一:各个范围Session步长、访问时长占比统计](https://blog.csdn.net/zisuu/article/details/106329092)
 14 | 
 15 | 
 16 | [需求二:按照比列随机抽取session](https://blog.csdn.net/zisuu/article/details/106333719)
 17 | 
 18 | [需求三:热门top10商品](https://blog.csdn.net/zisuu/article/details/106335694)
 19 | 
 20 | [需求四:Top10热门品类的Top10活跃Session统计](https://blog.csdn.net/zisuu/article/details/106338047)
 21 | 
 22 | [需求五:计算给定的页面访问流的页面单跳转化率](https://blog.csdn.net/zisuu/article/details/106341485)
 23 | 
 24 | [需求六:实时统计之黑名单机制](https://blog.csdn.net/zisuu/article/details/106354769)
 25 | 
 26 | 
 27 | [需求七,九前置知识](https://blog.csdn.net/zisuu/article/details/106358260)
 28 | 
 29 | [需求七:实时统计之各省各城市广告点击量实时统计](https://blog.csdn.net/zisuu/article/details/106356262)
 30 | 
 31 | [需求八:实时统计之各省份广告top3排名](https://blog.csdn.net/zisuu/article/details/106357644)
 32 | 
 33 | 
 34 | [需求九:实时统计之最近一小时广告点击量实时统计](https://blog.csdn.net/zisuu/article/details/106359362)
 35 | 
 36 | [需求十:总结](https://blog.csdn.net/zisuu/article/details/106359657)
 37 | 
 38 | 
 39 | ## 项目整体概述
 40 | **课程简介**
 41 | >本课程是一套完整的企业级电商大数据分析系统，在当下最为热门的Spark生态体系基础上构建企业级数据分析平台，本系统包括离线分析系统与实时分析系统，技术栈涵盖Spark Core，Spark SQL，Spark Streaming与Spark性能调优，并在课程中穿插Spark内核原理与面试要点，能够让学员在实战中全面掌握Spark生态体系的核心技术框架。
 42 | 本课程内容丰富，所有需求均来自于企业内部，对于每一个具体需求，讲师全部采用文字与图片相结合的讲解方式，从零实现每一个需求代码并对代码进行逐行解析，让学员知其然并知其所以然，通过本课程，能够让你对Spark技术框架的理解达到新的高度。
 43 | 
 44 | **如何学习?**
 45 | 
 46 | 
 47 | - 到github下载源码(顺便给个start噢!)
 48 | 地址:[spark-shopAnalyze](https://github.com/zisuu870/spark-shopAnalyze)
 49 | 
 50 | - 根据目录中第一篇文章,理解commons模块和mock模块的作用,并跟着文章创建一个maven工程!!!这个是很重要的,
 51 | - 根据目录中第二篇文章,理解需求的大概内容,
 52 | - 跟着目录顺序,理解每个需求的大致内容,然后一定要自己手打一遍
 53 | - 每做完一个需求,总结该需求所学
 54 | - 遇到不会的算子自查百度
 55 | 
 56 | 
 57 |  
 58 | **所用技术框架**
 59 | - spark(spark-sql,spark-streaming-spark-sql)
 60 | - hive
 61 | - kafka
 62 | - mysql
 63 | - hadoop-hdfs
 64 | 
 65 | **所需环境**
 66 | 
 67 | - hadoop
 68 | >本人是利用virtualBox搭建了hadoop的完全分布式环境如果你还没有hadoop环境,可以参考下面两篇文章:
 69 | 
 70 | [【超详细】最新Windows下安装Virtual Box后安装CentOS7并设置双网卡实现宿主机与虚拟机互相访问](https://blog.csdn.net/adamlinsfz/article/details/84108536)
 71 |  [【超详细】最新VirtualBox+CentOS7+Hadoop2.8.5手把手搭建完全分布式Hadoop集群（从小白逐步进阶）](https://blog.csdn.net/adamlinsfz/article/details/84333389)
 72 | 
 73 | - IDEA scala,spark开发环境
 74 | 
 75 | [如何用Idea运行我们的Spark项目](https://www.cnblogs.com/tjp40922/p/12177913.html)
 76 | 
 77 | - sparkStreaming与kafka的整合
 78 | 
 79 | [Spark Streaming整合Kafka](https://www.jianshu.com/p/ec3bf53dcf3f)
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | **主要功能**
 86 | 主要分为离线统计和实时统计两部分,共分十个需求,每个需求一篇文章进行详解,保证能看的懂
 87 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526170258516.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
 88 | 
 89 | **你能学到什么?**
 90 | 
 91 | - 整合hadoop-hdfs,kafka,spark,spark-sql,spark-streaming,hive等大数据常用框架,对所学知识起到梳理作用
 92 | - 对spark的各个算子,以及spark-sql,spark-streaming深入理解.这个教程主要的核心框架就是spark
 93 | - 知道常见的大数据计算模式,懂得如何对计算需求进行分析,逆推,并且做到活学活用
 94 | 
 95 | 
 96 | ## 项目模块分析
 97 | 
 98 | **项目目录:**
 99 | 
100 | 
101 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526171831646.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
102 | **commons模块**
103 | 
104 | >commons主要用于一些配置读取,对象连接池获取,代码规范等
105 | 
106 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526171935541.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
107 | 
108 | **mock模块**
109 | - mock模块主要用于模拟数据的获取,
110 | - MockDataGenerate用于产生离线的数据,你可以选择保存到hadoop中,亦或者保存到hive中.如果你还没学过hive,那就保存到hadoop
111 | - MockRealTimeData用于产生实时数据,并通过kafka将数据发送到sparkStreaming,以便统计实时的数据
112 | 
113 | 
114 | 
115 | 
116 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526172158317.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
117 | **sesion模块**
118 | 
119 | - sesion模块是离线数据统计的模块
120 | - 其中sessionStat是主函数所在处
121 | - server目录下的各个server是各个需求的代码处,会通过主函数sessionStat进行引用
122 | - sessionAccumulator是自定义的累加器
123 | - SortKey是自定义排序器
124 | 
125 | 
126 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526172431181.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
127 | 
128 | **adverStat模块**
129 | 
130 | - advertStat是主函数所在
131 | - 因为实时部分的需求是上下相互关联的,所以都在一个主函数中进行调用
132 | - jdbcHelper,可视为java中的dao层
133 | 
134 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200526172729749.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3ppc3V1,size_16,color_FFFFFF,t_70)
135 | 
136 | 


--------------------------------------------------------------------------------
/adverStat/adverStat.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="JAVA_MODULE" version="4" />


--------------------------------------------------------------------------------
/adverStat/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>shopAnalyze</artifactId>
  7 |         <groupId>org.example</groupId>
  8 |         <version>1.0-SNAPSHOT</version>
  9 |     </parent>
 10 |     <modelVersion>4.0.0</modelVersion>
 11 | 
 12 |     <artifactId>adverStat</artifactId>
 13 | 
 14 |     <dependencies>
 15 |         <dependency>
 16 |             <groupId>org.apache.hadoop</groupId>
 17 |             <artifactId>hadoop-client</artifactId>
 18 |             <version>2.8.5</version>
 19 |         </dependency>
 20 |         <dependency>
 21 |             <groupId>com.fasterxml.jackson.core</groupId>
 22 |             <artifactId>jackson-core</artifactId>
 23 |             <version>2.10.0</version>
 24 |         </dependency>
 25 |         <dependency>
 26 |             <groupId>com.fasterxml.jackson.core</groupId>
 27 |             <artifactId>jackson-annotations</artifactId>
 28 |             <version>2.10.0</version>
 29 |         </dependency>
 30 |         <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
 31 |         <dependency>
 32 |             <groupId>com.fasterxml.jackson.core</groupId>
 33 |             <artifactId>jackson-databind</artifactId>
 34 |             <version>2.10.0</version>
 35 |         </dependency>
 36 | 
 37 |         <!--解析json字符串-->
 38 |         <dependency>
 39 |             <groupId>com.alibaba</groupId>
 40 |             <artifactId>fastjson</artifactId>
 41 |             <version>1.2.36</version>
 42 |         </dependency>
 43 | 
 44 |         <dependency>
 45 |             <groupId>org.apache.hadoop</groupId>
 46 |             <artifactId>hadoop-common</artifactId>
 47 |             <version>2.8.5</version>
 48 |         </dependency>
 49 |         <dependency>
 50 |             <groupId>org.apache.hadoop</groupId>
 51 |             <artifactId>hadoop-hdfs</artifactId>
 52 |             <version>2.8.5</version>
 53 |         </dependency>
 54 |         <dependency>
 55 |             <groupId>commons-beanutils</groupId>
 56 |             <artifactId>commons-beanutils</artifactId>
 57 |             <version>1.9.3</version>
 58 |         </dependency>
 59 |         <dependency>
 60 |             <groupId>org.apache.hadoop</groupId>
 61 |             <artifactId>hadoop-yarn-common</artifactId>
 62 |             <version>2.8.5</version>
 63 |         </dependency>
 64 |         <dependency>
 65 |             <groupId>org.codehaus.janino</groupId>
 66 |             <artifactId>janino</artifactId>
 67 |             <version>3.0.8</version>
 68 |         </dependency>
 69 |         <dependency>
 70 |             <groupId>org.apache.spark</groupId>
 71 |             <artifactId>spark-sql_2.12</artifactId>
 72 |             <version>2.4.5</version>
 73 |         </dependency>
 74 |         <dependency>
 75 |             <groupId>mysql</groupId>
 76 |             <artifactId>mysql-connector-java</artifactId>
 77 |             <version>8.0.20</version>
 78 |         </dependency>
 79 |         <dependency>
 80 |             <groupId>org.example</groupId>
 81 |             <artifactId>commons</artifactId>
 82 |             <version>${project.version}</version>
 83 |         </dependency>
 84 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
 85 |         <dependency>
 86 |             <groupId>org.apache.spark</groupId>
 87 |             <artifactId>spark-core_2.12</artifactId>
 88 |             <version>2.4.5</version>
 89 |         </dependency>
 90 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
 91 |         <dependency>
 92 |             <groupId>org.apache.spark</groupId>
 93 |             <artifactId>spark-hive_2.12</artifactId>
 94 |             <version>2.4.5</version>
 95 |         </dependency>
 96 |         <dependency>
 97 |             <groupId>org.apache.spark</groupId>
 98 |             <artifactId>spark-streaming_2.12</artifactId>
 99 |             <version>2.4.5</version>
100 |         </dependency>
101 |         <dependency>
102 |             <groupId>org.apache.spark</groupId>
103 |             <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
104 |             <version>2.4.3</version>
105 |             <exclusions>
106 |                 <exclusion>
107 |                     <artifactId>slf4j-log4j12</artifactId>
108 |                     <groupId>org.slf4j</groupId>
109 |                 </exclusion>
110 |             </exclusions>
111 |         </dependency>
112 |         <dependency>
113 |             <groupId>org.apache.spark</groupId>
114 |             <artifactId>spark-sql_2.12</artifactId>
115 |             <version>2.4.5</version>
116 |         </dependency>
117 |     </dependencies>
118 | </project>


--------------------------------------------------------------------------------
/adverStat/src/main/java/scala/JdbcHelper.scala:
--------------------------------------------------------------------------------
  1 | package scala
  2 | 
  3 | /*
  4 |  * Copyright (c) 2017. Atguigu Inc. All Rights Reserved.
  5 |  * Date: 11/1/17 3:40 PM.
  6 |  * Author: wuyufei.
  7 |  */
  8 | 
  9 | import java.sql.{DriverManager, ResultSet}
 10 | 
 11 | import commons.model.{AdBlacklist, AdClickTrend, AdProvinceTop3, AdStat, AdUserClickCount}
 12 | import commons.pool.{CreateMySqlPool, QueryCallback}
 13 | 
 14 | import scala.collection.mutable.ArrayBuffer
 15 | 
 16 | /**
 17 |   * 用户黑名单DAO类
 18 |   */
 19 | object AdBlacklistDAO {
 20 | 
 21 |   /**
 22 |     * 批量插入广告黑名单用户
 23 |     *
 24 |     * @param adBlacklists
 25 |     */
 26 |   def insertBatch(adBlacklists: Array[AdBlacklist]) {
 27 |     // 批量插入
 28 |     val sql = "INSERT INTO ad_blacklist VALUES(?)"
 29 | 
 30 |     val paramsList = new ArrayBuffer[Array[Any]]()
 31 | 
 32 |     // 向paramsList添加userId
 33 |     for (adBlacklist <- adBlacklists) {
 34 |       val params: Array[Any] = Array(adBlacklist.userid)
 35 |       paramsList += params
 36 |     }
 37 |     // 获取对象池单例对象
 38 |     val mySqlPool = CreateMySqlPool()
 39 |     // 从对象池中提取对象
 40 |     val client = mySqlPool.borrowObject()
 41 | 
 42 |     // 执行批量插入操作
 43 |     client.executeBatch(sql, paramsList.toArray)
 44 |     // 使用完成后将对象返回给对象池
 45 |     mySqlPool.returnObject(client)
 46 |   }
 47 | 
 48 |   /**
 49 |     * 查询所有广告黑名单用户
 50 |     *
 51 |     * @return
 52 |     */
 53 |   def findAll(): Array[AdBlacklist] = {
 54 |     // 将黑名单中的所有数据查询出来
 55 |     val sql = "SELECT * FROM ad_blacklist"
 56 | 
 57 |     val adBlacklists = new ArrayBuffer[AdBlacklist]()
 58 | 
 59 |     // 获取对象池单例对象
 60 |     val mySqlPool = CreateMySqlPool()
 61 |     // 从对象池中提取对象
 62 |     val client = mySqlPool.borrowObject()
 63 | 
 64 |     // 执行sql查询并且通过处理函数将所有的userid加入array中
 65 |     client.executeQuery(sql, null, new QueryCallback {
 66 |       override def process(rs: ResultSet): Unit = {
 67 |         while (rs.next()) {
 68 |           val userid = rs.getInt(1).toLong
 69 |           adBlacklists += AdBlacklist(userid)
 70 |         }
 71 |       }
 72 |     })
 73 | 
 74 |     // 使用完成后将对象返回给对象池
 75 |     mySqlPool.returnObject(client)
 76 |     adBlacklists.toArray
 77 |   }
 78 | }
 79 | 
 80 | 
 81 | /**
 82 |   * 用户广告点击量DAO实现类
 83 |   *
 84 |   */
 85 | object AdUserClickCountDAO {
 86 |   def updateBatch1(adUserClickCounts: Array[AdUserClickCount]): Unit ={
 87 |     val mySqlPool=CreateMySqlPool();
 88 |     val client=mySqlPool.borrowObject();
 89 |     val buffer=new StringBuilder();
 90 |     var sql = "INSERT INTO ad_user_click_count VALUES"
 91 |     buffer.append(sql);
 92 |     var index=0;
 93 |     for (index<-0 to adUserClickCounts.size-1){
 94 |       val action=adUserClickCounts(index);
 95 |       var sql1="("+action.date+","+action.userid+","+action.adid+","+action.clickCount+")";
 96 |       buffer.append(sql1);
 97 |       if (index<adUserClickCounts.size-1)buffer.append(",");
 98 |     }
 99 |     buffer.append(" on duplicate key update clickCount=values(clickCount)");
100 |     println(buffer.toString());
101 |     client.executeBatch1(buffer.toString());
102 |   }
103 | 
104 |   def updateBatch(adUserClickCounts: Array[AdUserClickCount]) {
105 |     // 获取对象池单例对象
106 |     val mySqlPool = CreateMySqlPool()
107 |     // 从对象池中提取对象
108 |     val client = mySqlPool.borrowObject()
109 | 
110 |     // 首先对用户广告点击量进行分类，分成待插入的和待更新的
111 |     val insertAdUserClickCounts = ArrayBuffer[AdUserClickCount]()
112 |     val updateAdUserClickCounts = ArrayBuffer[AdUserClickCount]()
113 | 
114 |     val selectSQL = "SELECT count(*) FROM ad_user_click_count WHERE date=? AND userid=? AND adid=? "
115 | 
116 |     for (adUserClickCount <- adUserClickCounts) {
117 | 
118 |       val selectParams: Array[Any] = Array(adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid)
119 |       // 根据传入的用户点击次数统计数据从已有的ad_user_click_count中进行查询
120 |       client.executeQuery(selectSQL, selectParams, new QueryCallback {
121 |         override def process(rs: ResultSet): Unit = {
122 |           // 如果能查询到并且点击次数大于0，则认为是待更新项
123 |           if (rs.next() && rs.getInt(1) > 0) {
124 |             updateAdUserClickCounts += adUserClickCount
125 |           } else {
126 |             insertAdUserClickCounts += adUserClickCount
127 |           }
128 |         }
129 |       })
130 |     }
131 | 
132 |     // 执行批量插入
133 |     val insertSQL = "INSERT INTO ad_user_click_count VALUES(?,?,?,?)"
134 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
135 | 
136 |     // 将待插入项全部加入到参数列表中
137 |     for (adUserClickCount <- insertAdUserClickCounts) {
138 |       insertParamsList += Array[Any](adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid, adUserClickCount.clickCount)
139 |     }
140 | 
141 |     // 执行批量插入
142 |     client.executeBatch(insertSQL, insertParamsList.toArray)
143 | 
144 |     // 执行批量更新
145 |     // clickCount=clickCount + ：此处的UPDATE是进行累加
146 |     val updateSQL = "UPDATE ad_user_click_count SET clickCount=clickCount + ? WHERE date=? AND userid=? AND adid=?"
147 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
148 | 
149 |     // 将待更新项全部加入到参数列表中
150 |     for (adUserClickCount <- updateAdUserClickCounts) {
151 |       updateParamsList += Array[Any](adUserClickCount.clickCount, adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid)
152 |     }
153 | 
154 |     // 执行批量更新
155 |     client.executeBatch(updateSQL, updateParamsList.toArray)
156 | 
157 |     // 使用完成后将对象返回给对象池
158 |     mySqlPool.returnObject(client)
159 |   }
160 | 
161 |   /**
162 |     * 根据多个key查询用户广告点击量
163 |     *
164 |     * @param date   日期
165 |     * @param userid 用户id
166 |     * @param adid   广告id
167 |     * @return
168 |     */
169 |   def findClickCountByMultiKey(date: String, userid: Long, adid: Long): Int = {
170 |     // 获取对象池单例对象
171 |     val mySqlPool = CreateMySqlPool()
172 |     // 从对象池中提取对象
173 |     val client = mySqlPool.borrowObject()
174 | 
175 |     val sql = "SELECT clickCount FROM ad_user_click_count " +
176 |       "WHERE date=? " +
177 |       "AND userid=? " +
178 |       "AND adid=?"
179 | 
180 |     var clickCount = 0
181 |     val params = Array[Any](date, userid, adid)
182 | 
183 |     // 根据多个条件查询指定用户的点击量，将查询结果累加到clickCount中
184 |     client.executeQuery(sql, params, new QueryCallback {
185 |       override def process(rs: ResultSet): Unit = {
186 |         if (rs.next()) {
187 |           clickCount = rs.getInt(1)
188 |         }
189 |       }
190 |     })
191 |     // 使用完成后将对象返回给对象池
192 |     mySqlPool.returnObject(client)
193 |     clickCount
194 |   }
195 | }
196 | 
197 | 
198 | /**
199 |   * 广告实时统计DAO实现类
200 |   *
201 |   * @author Administrator
202 |   *
203 |   */
204 | object AdStatDAO {
205 | 
206 |   def updateBatch(adStats: Array[AdStat]) {
207 |     // 获取对象池单例对象
208 |     val mySqlPool = CreateMySqlPool()
209 |     // 从对象池中提取对象
210 |     val client = mySqlPool.borrowObject()
211 | 
212 | 
213 |     // 区分开来哪些是要插入的，哪些是要更新的
214 |     val insertAdStats = ArrayBuffer[AdStat]()
215 |     val updateAdStats = ArrayBuffer[AdStat]()
216 | 
217 |     val selectSQL = "SELECT count(*) " +
218 |       "FROM ad_stat " +
219 |       "WHERE date=? " +
220 |       "AND province=? " +
221 |       "AND city=? " +
222 |       "AND adid=?"
223 | 
224 |     for (adStat <- adStats) {
225 | 
226 |       val params = Array[Any](adStat.date, adStat.province, adStat.city, adStat.adid)
227 |       // 通过查询结果判断当前项时待插入还是待更新
228 |       client.executeQuery(selectSQL, params, new QueryCallback {
229 |         override def process(rs: ResultSet): Unit = {
230 |           if (rs.next() && rs.getInt(1) > 0) {
231 |             updateAdStats += adStat
232 |           } else {
233 |             insertAdStats += adStat
234 |           }
235 |         }
236 |       })
237 |     }
238 | 
239 |     // 对于需要插入的数据，执行批量插入操作
240 |     val insertSQL = "INSERT INTO ad_stat VALUES(?,?,?,?,?)"
241 | 
242 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
243 | 
244 |     for (adStat <- insertAdStats) {
245 |       insertParamsList += Array[Any](adStat.date, adStat.province, adStat.city, adStat.adid, adStat.clickCount)
246 |     }
247 | 
248 |     client.executeBatch(insertSQL, insertParamsList.toArray)
249 | 
250 |     // 对于需要更新的数据，执行批量更新操作
251 |     // 此处的UPDATE是进行覆盖
252 |     val updateSQL = "UPDATE ad_stat SET clickCount=? " +
253 |       "WHERE date=? " +
254 |       "AND province=? " +
255 |       "AND city=? " +
256 |       "AND adid=?"
257 | 
258 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
259 | 
260 |     for (adStat <- updateAdStats) {
261 |       updateParamsList += Array[Any](adStat.clickCount, adStat.date, adStat.province, adStat.city, adStat.adid)
262 |     }
263 | 
264 |     client.executeBatch(updateSQL, updateParamsList.toArray)
265 | 
266 |     // 使用完成后将对象返回给对象池
267 |     mySqlPool.returnObject(client)
268 |   }
269 | 
270 | }
271 | 
272 | 
273 | /**
274 |   * 各省份top3热门广告DAO实现类
275 |   *
276 |   * @author Administrator
277 |   *
278 |   */
279 | object AdProvinceTop3DAO {
280 | 
281 |   def updateBatch(adProvinceTop3s: Array[AdProvinceTop3]) {
282 |     // 获取对象池单例对象
283 |     val mySqlPool = CreateMySqlPool()
284 |     // 从对象池中提取对象
285 |     val client = mySqlPool.borrowObject()
286 | 
287 |     // dateProvinces可以实现一次去重
288 |     // AdProvinceTop3：date province adid clickCount，由于每条数据由date province adid组成
289 |     // 当只取date province时，一定会有重复的情况
290 |     val dateProvinces = ArrayBuffer[String]()
291 | 
292 |     for (adProvinceTop3 <- adProvinceTop3s) {
293 |       // 组合新key
294 |       val key = adProvinceTop3.date + "_" + adProvinceTop3.province
295 | 
296 |       // dateProvinces中不包含当前key才添加
297 |       // 借此去重
298 |       if (!dateProvinces.contains(key)) {
299 |         dateProvinces += key
300 |       }
301 |     }
302 | 
303 |     // 根据去重后的date和province，进行批量删除操作
304 |     // 先将原来的数据全部删除
305 |     val deleteSQL = "DELETE FROM ad_province_top3 WHERE date=? AND province=?"
306 | 
307 |     val deleteParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
308 | 
309 |     for (dateProvince <- dateProvinces) {
310 | 
311 |       val dateProvinceSplited = dateProvince.split("_")
312 |       val date = dateProvinceSplited(0)
313 |       val province = dateProvinceSplited(1)
314 | 
315 |       val params = Array[Any](date, province)
316 |       deleteParamsList += params
317 |     }
318 | 
319 |     client.executeBatch(deleteSQL, deleteParamsList.toArray)
320 | 
321 |     // 批量插入传入进来的所有数据
322 |     val insertSQL = "INSERT INTO ad_province_top3 VALUES(?,?,?,?)"
323 | 
324 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
325 | 
326 |     // 将传入的数据转化为参数列表
327 |     for (adProvinceTop3 <- adProvinceTop3s) {
328 |       insertParamsList += Array[Any](adProvinceTop3.date, adProvinceTop3.province, adProvinceTop3.adid, adProvinceTop3.clickCount)
329 |     }
330 | 
331 |     client.executeBatch(insertSQL, insertParamsList.toArray)
332 | 
333 |     // 使用完成后将对象返回给对象池
334 |     mySqlPool.returnObject(client)
335 |   }
336 | 
337 | }
338 | 
339 | 
340 | /**
341 |   * 广告点击趋势DAO实现类
342 |   *
343 |   * @author Administrator
344 |   *
345 |   */
346 | object AdClickTrendDAO extends Serializable {
347 | 
348 |   def updateBatch(adClickTrends: Array[AdClickTrend]) {
349 |     // 获取对象池单例对象
350 |     val mySqlPool = CreateMySqlPool()
351 |     // 从对象池中提取对象
352 |     val client = mySqlPool.borrowObject()
353 | 
354 |     // 区分开来哪些是要插入的，哪些是要更新的
355 |     val updateAdClickTrends = ArrayBuffer[AdClickTrend]()
356 |     val insertAdClickTrends = ArrayBuffer[AdClickTrend]()
357 | 
358 |     val selectSQL = "SELECT count(*) " +
359 |       "FROM ad_click_trend " +
360 |       "WHERE date=? " +
361 |       "AND hour=? " +
362 |       "AND minute=? " +
363 |       "AND adid=?"
364 | 
365 |     for (adClickTrend <- adClickTrends) {
366 |       // 通过查询结果判断当前项时待插入还是待更新
367 |       val params = Array[Any](adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid)
368 |       client.executeQuery(selectSQL, params, new QueryCallback {
369 |         override def process(rs: ResultSet): Unit = {
370 |           if (rs.next() && rs.getInt(1) > 0) {
371 |             updateAdClickTrends += adClickTrend
372 |           } else {
373 |             insertAdClickTrends += adClickTrend
374 |           }
375 |         }
376 |       })
377 | 
378 |     }
379 | 
380 |     // 执行批量更新操作
381 |     // 此处的UPDATE是覆盖
382 |     val updateSQL = "UPDATE ad_click_trend SET clickCount=? " +
383 |       "WHERE date=? " +
384 |       "AND hour=? " +
385 |       "AND minute=? " +
386 |       "AND adid=?"
387 | 
388 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
389 | 
390 |     for (adClickTrend <- updateAdClickTrends) {
391 |       updateParamsList += Array[Any](adClickTrend.clickCount, adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid)
392 |     }
393 | 
394 |     client.executeBatch(updateSQL, updateParamsList.toArray)
395 | 
396 |     // 执行批量更新操作
397 |     val insertSQL = "INSERT INTO ad_click_trend VALUES(?,?,?,?,?)"
398 | 
399 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
400 | 
401 |     for (adClickTrend <- insertAdClickTrends) {
402 |       insertParamsList += Array[Any](adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid, adClickTrend.clickCount)
403 |     }
404 | 
405 |     client.executeBatch(insertSQL, insertParamsList.toArray)
406 | 
407 |     // 使用完成后将对象返回给对象池
408 |     mySqlPool.returnObject(client)
409 |   }
410 | 
411 | }
412 | 
413 | 


--------------------------------------------------------------------------------
/adverStat/src/main/java/scala/advertStat.scala:
--------------------------------------------------------------------------------
  1 | package scala
  2 | 
  3 | import java.util.Date
  4 | 
  5 | import commons.conf.ConfigurationManager
  6 | import commons.constant.Constants
  7 | import commons.model.{AdBlacklist, AdClickTrend, AdProvinceTop3, AdStat, AdUserClickCount}
  8 | import commons.utils.DateUtils
  9 | import org.apache.kafka.common.serialization.StringDeserializer
 10 | import org.apache.spark.SparkConf
 11 | import org.apache.spark.sql.SparkSession
 12 | import org.apache.spark.sql.catalyst.expressions.{Hour, Minute}
 13 | import org.apache.spark.streaming.dstream.DStream
 14 | import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
 15 | import org.apache.spark.streaming.{Duration, Minutes, Seconds, StreamingContext}
 16 | 
 17 | import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 18 | 
 19 | object advertStat {
 20 | 
 21 | 
 22 |   def main(args: Array[String]): Unit = {
 23 |     val sparkConf = new SparkConf().setAppName("adver").setMaster("local[*]").set("spark.serializer","org.apache.spark.serializer.KryoSerializer");
 24 |     val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
 25 |     sparkSession.sparkContext.setLogLevel("ERROR");
 26 | 
 27 |     // val streamingContext = StreamingContext.getActiveOrCreate(checkpointDir, func)
 28 |     val ssc = new StreamingContext(sparkSession.sparkContext, Seconds(5))
 29 | 
 30 |     val kafka_brokers = ConfigurationManager.config.getString("kafka.broker.list")
 31 |     val kafka_topics = ConfigurationManager.config.getString(Constants.KAFKA_TOPICS)
 32 | 
 33 |     val kafkaParam = Map(
 34 |       "bootstrap.servers" -> kafka_brokers,
 35 |       "key.deserializer" -> classOf[StringDeserializer],
 36 |       "value.deserializer" -> classOf[StringDeserializer],
 37 |       "group.id" -> "0",
 38 |       // auto.offset.reset
 39 |       // latest: 先去Zookeeper获取offset，如果有，直接使用，如果没有，从最新的数据开始消费；
 40 |       // earlist: 先去Zookeeper获取offset，如果有，直接使用，如果没有，从最开始的数据开始消费
 41 |       // none: 先去Zookeeper获取offset，如果有，直接使用，如果没有，直接报错
 42 |       "auto.offset.reset" -> "latest",
 43 |       "enable.auto.commit" -> (false:java.lang.Boolean)
 44 |     )
 45 | 
 46 |     // adRealTimeDStream: DStream[RDD RDD RDD ...]  RDD[message]  message: key value
 47 |     val adRealTimeDStream = KafkaUtils.createDirectStream[String, String](ssc,
 48 |       LocationStrategies.PreferConsistent,
 49 |       ConsumerStrategies.Subscribe[String, String](Array(kafka_topics), kafkaParam)
 50 |     )
 51 |     val adReadTimeValueDStream=adRealTimeDStream.map(item=>item.value);
 52 |    val adRealTimeFilterDstream=adReadTimeValueDStream.transform{
 53 |     RDDS=>{
 54 |       val blackList=AdBlacklistDAO.findAll();
 55 |       val black=blackList.map(item=>item.userid);
 56 |       RDDS.filter{
 57 |         log=>{
 58 |           val userId=log.split(" ")(3).toLong;
 59 |           !black.contains(userId);
 60 |         }
 61 |       }
 62 |     }
 63 | 
 64 |    }
 65 | 
 66 |     ssc.checkpoint("hdfs://hadoop1:9000/sparkStreaming")
 67 |     adRealTimeFilterDstream.checkpoint(Duration(10000))
 68 | 
 69 | 
 70 | 
 71 |     /*
 72 |     需求一------实时维护黑名单
 73 |      */
 74 | 
 75 |     //generateBlackList(adRealTimeFilterDstream);
 76 | 
 77 |     /*
 78 |    需求二------实时统计各省各区域的广告点击量
 79 |     */
 80 |     //val key2ProvinceCityCountDStream=provinceCityClickStat(adRealTimeFilterDstream)
 81 | 
 82 |     /*
 83 |     需求三_-------------top3广告
 84 |     */
 85 |    // proveinceTope3Adver(sparkSession,key2ProvinceCityCountDStream)
 86 | 
 87 | 
 88 |     /*
 89 |     需求四-------------实时统计近一个小时的广告点击量
 90 |     */
 91 |     getRecentHourClickCount(adRealTimeFilterDstream)
 92 | 
 93 |     ssc.start();
 94 |     ssc.awaitTermination();
 95 | 
 96 |   }
 97 |   def getRecentHourClickCount(adRealTimeFilterDstream: DStream[String]) = {
 98 |      //1.转化key为dateTime_adid
 99 |      val key2TimeMinute=adRealTimeFilterDstream.map{
100 |        case(log)=>{
101 |          val logSplit = log.split(" ")
102 |          val timeStamp = logSplit(0).toLong
103 |          // yyyyMMddHHmm
104 |          val timeMinute = DateUtils.formatTimeMinute(new Date(timeStamp))
105 |          val adid = logSplit(4).toLong
106 | 
107 |          val key = timeMinute + "_" + adid
108 | 
109 |          (key, 1L)
110 |        }
111 |      }
112 |     //2.window operation 统计
113 |     val windowKey2=key2TimeMinute.reduceByKeyAndWindow((a:Long, b:Long)=>(a+b), Seconds(10), Seconds(5));
114 |     //3.封装入库
115 |     windowKey2.foreachRDD{
116 |       rdd => rdd.foreachPartition{
117 |         // (key, count)
118 |         items=>
119 |           val trendArray = new ArrayBuffer[AdClickTrend]()
120 |           for((key, count) <- items){
121 |             val keySplit = key.split("_")
122 |             // yyyyMMddHHmm
123 |             val timeMinute = keySplit(0)
124 |             val date = timeMinute.substring(0, 8)
125 |             val hour = timeMinute.substring(8,10)
126 |             val minute = timeMinute.substring(10)
127 |             val adid  = keySplit(1).toLong
128 | 
129 |             trendArray += AdClickTrend(date, hour, minute, adid, count)
130 |           }
131 |             trendArray.foreach(println);
132 |           //AdClickTrendDAO.updateBatch(trendArray.toArray)
133 |       }
134 |     }
135 |   }
136 |   def proveinceTope3Adver(sparkSession: SparkSession,
137 |                           key2ProvinceCityCountDStream: DStream[(String, Long)])={
138 |     //1.转化key为date_province_adid,value仍然是原本的count
139 |     val key2ProvinceCountDStream=key2ProvinceCityCountDStream.map{
140 |       case (key,count)=>{
141 |         val keySplit = key.split("_")
142 |         val date = keySplit(0)
143 |         val province = keySplit(1)
144 |         val adid = keySplit(3)
145 |         (date+"_"+province+"_"+adid,count);
146 |       }
147 |     }
148 |     //2.累增,创建临时表
149 |     val key2ProvinceAggCountDStream=key2ProvinceCountDStream.reduceByKey(_+_);
150 |     val top3DStream=key2ProvinceAggCountDStream.transform{
151 |       stream=>{
152 |         val temp=stream.map{
153 |           case (key,count)=>{
154 |             val keySplit = key.split("_")
155 |             val date = keySplit(0)
156 |             val province = keySplit(1)
157 |             val adid = keySplit(2).toLong
158 | 
159 |             (date, province, adid, count)
160 |           }
161 |         }
162 |         import sparkSession.implicits._;
163 |         temp.toDF("date","province","adid","count").createOrReplaceTempView("tmp_basic_info");
164 | 
165 |         val sql = "select date, province, adid, count from(" +
166 |           "select date, province, adid, count, " +
167 |           "row_number() over(partition by date,province order by count desc) rank from tmp_basic_info)  " +
168 |           "where rank <= 3"
169 |         sparkSession.sql(sql).rdd;
170 |       }
171 |     }
172 |     //3.数据封装
173 |     top3DStream.foreachRDD{
174 |       // rdd : RDD[row]
175 |       rdd =>
176 |         rdd.foreachPartition{
177 |           // items : row
178 |           items =>
179 |             val top3Array = new ArrayBuffer[AdProvinceTop3]()
180 |             for(item <- items){
181 |               val date = item.getAs[String]("date")
182 |               val province = item.getAs[String]("province")
183 |               val adid = item.getAs[Long]("adid")
184 |               val count = item.getAs[Long]("count")
185 | 
186 |               top3Array += AdProvinceTop3(date, province, adid, count)
187 |             }
188 |               //top3Array.foreach(println);
189 |             //AdProvinceTop3DAO.updateBatch(top3Array.toArray)
190 |         }
191 |     }
192 | 
193 |   }
194 |   def provinceCityClickStat(adRealTimeFilterDStream: DStream[String])={
195 |     val key2ProvinceCityDStream = adRealTimeFilterDStream.map{
196 |       case log =>
197 |         val logSplit = log.split(" ")
198 |         val timeStamp = logSplit(0).toLong
199 |         // dateKey : yy-mm-dd
200 |         val dateKey = DateUtils.formatDateKey(new Date(timeStamp))
201 |         val province = logSplit(1)
202 |         val city = logSplit(2)
203 |         val adid = logSplit(4)
204 | 
205 |         val key = dateKey + "_" + province + "_" + city + "_" + adid
206 |         (key, 1L)
207 |     }
208 | 
209 |     //使用updateStateByKey算子,维护数据的更新
210 |     val key2StateDStream = key2ProvinceCityDStream.updateStateByKey[Long]{
211 |       (values:Seq[Long], state:Option[Long])=>{
212 |          var newValues=state.getOrElse(0L);
213 |          for(v<-values)newValues+=v;
214 |          Some(newValues);
215 |       }
216 |     }
217 |     key2StateDStream.foreachRDD{
218 |       rdd => rdd.foreachPartition{
219 |         items =>
220 |           val adStatArray = new ArrayBuffer[AdStat]()
221 |           // key: date province city adid
222 |           for((key, count) <- items){
223 |             val keySplit = key.split("_")
224 |             val date = keySplit(0)
225 |             val province = keySplit(1)
226 |             val city = keySplit(2)
227 |             val adid = keySplit(3).toLong
228 | 
229 |             adStatArray += AdStat(date, province, city, adid, count)
230 |           }
231 |          // AdStatDAO.updateBatch(adStatArray.toArray)
232 |           //adStatArray.foreach(println);
233 |       }
234 |     }
235 |     key2StateDStream
236 |   }
237 |   def generateBlackList(adRealTimeFilterDstream: DStream[String])= {
238 |     val key2NumDStream=adRealTimeFilterDstream.map {
239 |       case (log)=>{
240 |         val logSplit = log.split(" ")
241 |         val timeStamp = logSplit(0).toLong
242 |         // yy-mm-dd
243 |         val dateKey = DateUtils.formatDateKey(new Date(timeStamp))
244 |         val userId = logSplit(3).toLong
245 |         val adid = logSplit(4).toLong
246 | 
247 |         val key = dateKey + "_" + userId + "_" + adid
248 | 
249 |         (key, 1L)
250 |       }
251 |     }
252 |     key2NumDStream
253 |     //1.先统计每个用户的点击次数
254 |     val keyCountStream=key2NumDStream.reduceByKey(_+_);
255 |     var flag=0;
256 |     //2.更新数据库
257 |     keyCountStream.foreachRDD{
258 |       RDDS=>RDDS.foreachPartition{
259 |         part=>{
260 |           val clickCountArray=new ArrayBuffer[AdUserClickCount]();
261 |           for((k,v)<-part){
262 |             val keySplit = k.split("_")
263 |             val date = keySplit(0)
264 |             val userId = keySplit(1).toLong
265 |             val adid = keySplit(2).toLong
266 | 
267 |             clickCountArray += AdUserClickCount(date, userId, adid, v)
268 |           }
269 |           if (clickCountArray.size>0){
270 |             flag=1;
271 |             AdUserClickCountDAO.updateBatch1(clickCountArray.toArray);
272 |           }
273 |         }
274 |       }
275 |     }
276 |     if (flag==1){
277 |       //3.对keyCountStream中的每个rdd,通过查询数据库,获取点击次数,从而进行过滤操作
278 |       val filterKeyCountStream=keyCountStream.filter {
279 |         case (key,count)=>{
280 |           val keySplit = key.split("_")
281 |           val date = keySplit(0)
282 |           val userId = keySplit(1).toLong
283 |           val adid = keySplit(2).toLong
284 | 
285 |           val clickCount = AdUserClickCountDAO.findClickCountByMultiKey(date, userId, adid)
286 | 
287 |           if(clickCount > 10){
288 |             println("userID:"+userId+"is die");
289 |             true
290 |           }else{
291 |             false
292 |           }
293 |         }
294 |       }
295 |       //4.将剩下的数据加入黑名单中
296 |       val filterBlackListDstream=filterKeyCountStream.map{
297 |         case (key,count)=>{
298 |           key.split("_")(1).toLong
299 |         }
300 |       }.transform(rdds=>rdds.distinct());
301 |       filterBlackListDstream.foreachRDD{
302 |         rdds=>rdds.foreachPartition{
303 |           part=>{
304 |             val buffer=new ListBuffer[AdBlacklist];
305 |             for(userId<-part){
306 |               buffer+=AdBlacklist(userId);
307 |             }
308 |             AdBlacklistDAO.insertBatch(buffer.toArray)
309 | 
310 |           }
311 |         }
312 |       }
313 |     }
314 | 
315 |   }
316 | 
317 | 
318 | }
319 | 


--------------------------------------------------------------------------------
/adverStat/src/main/resources/ad.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Navicat Premium Data Transfer
 3 | 
 4 |  Source Server         : localhost
 5 |  Source Server Type    : MySQL
 6 |  Source Server Version : 50720
 7 |  Source Host           : localhost
 8 |  Source Database       : commerce
 9 | 
10 |  Target Server Type    : MySQL
11 |  Target Server Version : 50720
12 |  File Encoding         : utf-8
13 | 
14 |  Date: 11/03/2017 11:23:32 AM
15 | */
16 | 
17 | SET FOREIGN_KEY_CHECKS = 0;
18 | 
19 | -- ----------------------------
20 | --  Table structure for `ad_blacklist`
21 | -- ----------------------------
22 | DROP TABLE IF EXISTS `ad_blacklist`;
23 | CREATE TABLE `ad_blacklist` (
24 |   `userid` int(11) DEFAULT NULL
25 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
26 | 
27 | -- ----------------------------
28 | --  Table structure for `ad_click_trend`
29 | -- ----------------------------
30 | DROP TABLE IF EXISTS `ad_click_trend`;
31 | CREATE TABLE `ad_click_trend` (
32 |   `date` varchar(30) DEFAULT NULL,
33 |   `hour` varchar(30) DEFAULT NULL,
34 |   `minute` varchar(30) DEFAULT NULL,
35 |   `adid` int(11) DEFAULT NULL,
36 |   `clickCount` int(11) DEFAULT NULL
37 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
38 | 
39 | -- ----------------------------
40 | --  Table structure for `ad_province_top3`
41 | -- ----------------------------
42 | DROP TABLE IF EXISTS `ad_province_top3`;
43 | CREATE TABLE `ad_province_top3` (
44 |   `date` varchar(30) DEFAULT NULL,
45 |   `province` varchar(100) DEFAULT NULL,
46 |   `adid` int(11) DEFAULT NULL,
47 |   `clickCount` int(11) DEFAULT NULL
48 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
49 | 
50 | -- ----------------------------
51 | --  Table structure for `ad_stat`
52 | -- ----------------------------
53 | DROP TABLE IF EXISTS `ad_stat`;
54 | CREATE TABLE `ad_stat` (
55 |   `date` varchar(30) DEFAULT NULL,
56 |   `province` varchar(100) DEFAULT NULL,
57 |   `city` varchar(100) DEFAULT NULL,
58 |   `adid` int(11) DEFAULT NULL,
59 |   `clickCount` int(11) DEFAULT NULL
60 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
61 | 
62 | -- ----------------------------
63 | --  Table structure for `ad_user_click_count`
64 | -- ----------------------------
65 | DROP TABLE IF EXISTS `ad_user_click_count`;
66 | CREATE TABLE `ad_user_click_count` (
67 |   `date` varchar(30) DEFAULT NULL,
68 |   `userid` int(11) DEFAULT NULL,
69 |   `adid` int(11) DEFAULT NULL,
70 |   `clickCount` int(11) DEFAULT NULL
71 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
72 | 
73 | 


--------------------------------------------------------------------------------
/adverStat/target/classes/META-INF/adverStat.kotlin_module:
--------------------------------------------------------------------------------
1 |             


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdBlacklistDAO$$anon$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdBlacklistDAO$$anon$1.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdBlacklistDAO$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdBlacklistDAO$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdBlacklistDAO.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdBlacklistDAO.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdClickTrendDAO$$anon$5.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdClickTrendDAO$$anon$5.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdClickTrendDAO$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdClickTrendDAO$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdClickTrendDAO.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdClickTrendDAO.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdProvinceTop3DAO$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdProvinceTop3DAO$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdProvinceTop3DAO.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdProvinceTop3DAO.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdStatDAO$$anon$4.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdStatDAO$$anon$4.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdStatDAO$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdStatDAO$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdStatDAO.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdStatDAO.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdUserClickCountDAO$$anon$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdUserClickCountDAO$$anon$2.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdUserClickCountDAO$$anon$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdUserClickCountDAO$$anon$3.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdUserClickCountDAO$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdUserClickCountDAO$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/AdUserClickCountDAO.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/AdUserClickCountDAO.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/advertStat$$typecreator5$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/advertStat$$typecreator5$1.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/advertStat$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/advertStat$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/advertStat.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/advertStat.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/test$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/test$.class


--------------------------------------------------------------------------------
/adverStat/target/classes/scala/test.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/adverStat/target/classes/scala/test.class


--------------------------------------------------------------------------------
/commons/commons.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="JAVA_MODULE" version="4" />


--------------------------------------------------------------------------------
/commons/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>shopAnalyze</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>commons</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>org.apache.spark</groupId>
17 |             <artifactId>spark-core_2.12</artifactId>
18 |             <version>2.4.5</version>
19 |         </dependency>
20 |         <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-configuration2 -->
21 |         <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-configuration2 -->
22 |         <dependency>
23 |             <groupId>org.apache.commons</groupId>
24 |             <artifactId>commons-configuration2</artifactId>
25 |             <version>2.5</version>
26 |         </dependency>
27 | 
28 |         <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
29 |         <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
30 |         <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
31 |         <dependency>
32 |             <groupId>commons-beanutils</groupId>
33 |             <artifactId>commons-beanutils</artifactId>
34 |             <version>1.9.3</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>commons-beanutils</groupId>
38 |             <artifactId>commons-beanutils-core</artifactId>
39 |             <version>1.8.3</version>
40 |         </dependency>
41 | 
42 | 
43 |         <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-pool2 -->
44 |         <dependency>
45 |             <groupId>org.apache.commons</groupId>
46 |             <artifactId>commons-pool2</artifactId>
47 |             <version>2.5.0</version>
48 |         </dependency>
49 | 
50 |         <dependency>
51 |             <groupId>org.apache.spark</groupId>
52 |             <artifactId>spark-sql_2.12</artifactId>
53 |             <version>2.4.5</version>
54 |         </dependency>
55 |         <dependency>
56 |             <groupId>org.apache.spark</groupId>
57 |             <artifactId>spark-streaming_2.12</artifactId>
58 |             <version>2.4.5</version>
59 |         </dependency>
60 | 
61 |         <dependency>
62 |             <groupId>mysql</groupId>
63 |             <artifactId>mysql-connector-java</artifactId>
64 |             <version>8.0.20</version>
65 |         </dependency>
66 |     </dependencies>
67 | 
68 | </project>


--------------------------------------------------------------------------------
/commons/src/main/java/commons/conf/ConfigurationManager.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
 3 |  */
 4 | 
 5 | package commons.conf
 6 | 
 7 | import org.apache.commons.configuration2.{FileBasedConfiguration, PropertiesConfiguration}
 8 | import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder
 9 | import org.apache.commons.configuration2.builder.fluent.Parameters
10 | 
11 | /**
12 |   * 配置工具类,基于文件的配置生成器,会读取resources下的commerce.properties,并返回所有配置信息
13 |  *
14 |   */
15 | object ConfigurationManager {
16 | 
17 |   // 创建用于初始化配置生成器实例的参数对象
18 |   private val params = new Parameters()
19 |   // FileBasedConfigurationBuilder:产生一个传入的类的实例对象
20 |   // FileBasedConfiguration:融合FileBased与Configuration的接口
21 |   // PropertiesConfiguration:从一个或者多个文件读取配置的标准配置加载器
22 |   // configure():通过params实例初始化配置生成器
23 |   // 向FileBasedConfigurationBuilder()中传入一个标准配置加载器类，生成一个加载器类的实例对象，然后通过params参数对其初始化
24 |   private val builder = new FileBasedConfigurationBuilder[FileBasedConfiguration](classOf[PropertiesConfiguration])
25 |     .configure(params.properties().setFileName("commerce.properties"))
26 | 
27 |   // 通过getConfiguration获取配置对象
28 |   val config = builder.getConfiguration()
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/commons/src/main/java/commons/constant/Constants.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
 3 |  */
 4 | 
 5 | package commons.constant
 6 | 
 7 | /**
 8 |  * 常量接口
 9 |  */
10 | object Constants {
11 | 
12 | 	/**
13 | 	 * 项目配置相关的常量
14 | 	 */
15 | 	val JDBC_DATASOURCE_SIZE = "jdbc.datasource.size"
16 | 	val JDBC_URL = "jdbc.url"
17 | 	val JDBC_USER = "jdbc.user"
18 | 	val JDBC_PASSWORD = "jdbc.password"
19 | 
20 | 	val KAFKA_TOPICS = "kafka.topics"
21 | 	
22 | 	/**
23 | 	 * Spark作业相关的常量
24 | 	 */
25 | 	val SPARK_APP_NAME_SESSION = "UserVisitSessionAnalyzeSpark"
26 | 	val SPARK_APP_NAME_PAGE = "PageOneStepConvertRateSpark"
27 | 
28 | 	/**
29 | 		* user_visit_action、user_info、product_info表中字段对应的字段名常量
30 | 		*/
31 | 	val FIELD_SESSION_ID = "sessionid"
32 | 	val FIELD_SEARCH_KEYWORDS = "searchKeywords"
33 | 	val FIELD_CLICK_CATEGORY_IDS = "clickCategoryIds"
34 | 	val FIELD_AGE = "age"
35 | 	val FIELD_PROFESSIONAL = "professional"
36 | 	val FIELD_CITY = "city"
37 | 	val FIELD_SEX = "sex"
38 | 	val FIELD_VISIT_LENGTH = "visitLength"
39 | 	val FIELD_STEP_LENGTH = "stepLength"
40 | 	val FIELD_START_TIME = "startTime"
41 | 	val FIELD_CLICK_COUNT = "clickCount"
42 | 	val FIELD_ORDER_COUNT = "orderCount"
43 | 	val FIELD_PAY_COUNT = "payCount"
44 | 	val FIELD_CATEGORY_ID = "categoryid"
45 | 
46 | 	/**
47 | 		* Spark累加器Key名称常量
48 | 		*/
49 | 	val SESSION_COUNT = "session_count"
50 | 	
51 | 	val TIME_PERIOD_1s_3s = "1s_3s"
52 | 	val TIME_PERIOD_4s_6s = "4s_6s"
53 | 	val TIME_PERIOD_7s_9s = "7s_9s"
54 | 	val TIME_PERIOD_10s_30s = "10s_30s"
55 | 	val TIME_PERIOD_30s_60s = "30s_60s"
56 | 	val TIME_PERIOD_1m_3m = "1m_3m"
57 | 	val TIME_PERIOD_3m_10m = "3m_10m"
58 | 	val TIME_PERIOD_10m_30m = "10m_30m"
59 | 	val TIME_PERIOD_30m = "30m"
60 | 	
61 | 	val STEP_PERIOD_1_3 = "1_3"
62 | 	val STEP_PERIOD_4_6 = "4_6"
63 | 	val STEP_PERIOD_7_9 = "7_9"
64 | 	val STEP_PERIOD_10_30 = "10_30"
65 | 	val STEP_PERIOD_30_60 = "30_60"
66 | 	val STEP_PERIOD_60 = "60"
67 | 	
68 | 	/**
69 | 	 * task.params.json中限制条件对应的常量字段
70 | 	 */
71 | 	val TASK_PARAMS = "task.params.json"
72 | 	val PARAM_START_DATE = "startDate"
73 | 	val PARAM_END_DATE = "endDate"
74 | 	val PARAM_START_AGE = "startAge"
75 | 	val PARAM_END_AGE = "endAge"
76 | 	val PARAM_PROFESSIONALS = "professionals"
77 | 	val PARAM_CITIES = "cities"
78 | 	val PARAM_SEX = "sex"
79 | 	val PARAM_KEYWORDS = "keywords"
80 | 	val PARAM_CATEGORY_IDS = "categoryIds"
81 | 	val PARAM_TARGET_PAGE_FLOW = "targetPageFlow"
82 | 	
83 | }
84 | 


--------------------------------------------------------------------------------
/commons/src/main/java/commons/model/DataModel.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
  3 |  */
  4 | 
  5 | package commons.model
  6 | /**
  7 |  * 广告黑名单
  8 |  *
  9 |  */
 10 | case class AdBlacklist(userid:Long)
 11 | 
 12 | /**
 13 |  * 用户广告点击量
 14 |  * @author wuyufei
 15 |  *
 16 |  */
 17 | case class AdUserClickCount(date:String,
 18 |                             userid:Long,
 19 |                             adid:Long,
 20 |                             clickCount:Long)
 21 | 
 22 | 
 23 | /**
 24 |  * 广告实时统计
 25 |  *
 26 |  */
 27 | case class AdStat(date:String,
 28 |                   province:String,
 29 |                   city:String,
 30 |                   adid:Long,
 31 |                   clickCount:Long)
 32 | 
 33 | /**
 34 |  * 各省top3热门广告
 35 |  *
 36 |  */
 37 | case class AdProvinceTop3(date:String,
 38 |                           province:String,
 39 |                           adid:Long,
 40 |                           clickCount:Long)
 41 | 
 42 | /**
 43 |  * 广告点击趋势
 44 |  *
 45 |  */
 46 | case class AdClickTrend(date:String,
 47 |                         hour:String,
 48 |                         minute:String,
 49 |                         adid:Long,
 50 |                         clickCount:Long)
 51 | 
 52 | //***************** 输入表 *********************
 53 | 
 54 | /**对象池的配置,当对于数据库连接池,用于避免对象创建过程中的损耗
 55 |   * 用户访问动作表
 56 |   *
 57 |   * @param date               用户点击行为的日期
 58 |   * @param user_id            用户的ID
 59 |   * @param session_id         Session的ID
 60 |   * @param page_id            某个页面的ID
 61 |   * @param action_time        点击行为的时间点
 62 |   * @param search_keyword     用户搜索的关键词
 63 |   * @param click_category_id  某一个商品品类的ID
 64 |   * @param click_product_id   某一个商品的ID
 65 |   * @param order_category_ids 一次订单中所有品类的ID集合
 66 |   * @param order_product_ids  一次订单中所有商品的ID集合
 67 |   * @param pay_category_ids   一次支付中所有品类的ID集合
 68 |   * @param pay_product_ids    一次支付中所有商品的ID集合
 69 |   * @param city_id            城市ID
 70 |   */
 71 | case class UserVisitAction(date: String,
 72 |                            user_id: Long,
 73 |                            session_id: String,
 74 |                            page_id: Long,
 75 |                            action_time: String,
 76 |                            search_keyword: String,
 77 |                            click_category_id: Long,
 78 |                            click_product_id: Long,
 79 |                            order_category_ids: String,
 80 |                            order_product_ids: String,
 81 |                            pay_category_ids: String,
 82 |                            pay_product_ids: String,
 83 |                            city_id: Long
 84 |                           )
 85 | 
 86 | /**
 87 |   * 用户信息表
 88 |   *
 89 |   * @param user_id      用户的ID
 90 |   * @param username     用户的名称
 91 |   * @param name         用户的名字
 92 |   * @param age          用户的年龄
 93 |   * @param professional 用户的职业
 94 |   * @param city         用户所在的城市
 95 |   * @param sex          用户的性别
 96 |   */
 97 | case class UserInfo(user_id: Long,
 98 |                     username: String,
 99 |                     name: String,
100 |                     age: Int,
101 |                     professional: String,
102 |                     city: String,
103 |                     sex: String
104 |                    )
105 | 
106 | /**
107 |   * 产品表
108 |   *
109 |   * @param product_id   商品的ID
110 |   * @param product_name 商品的名称
111 |   * @param extend_info  商品额外的信息
112 |   */
113 | case class ProductInfo(product_id: Long,
114 |                        product_name: String,
115 |                        extend_info: String
116 |                       )
117 | /*
118 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
119 |  */
120 | 
121 | //***************** 输出表 *********************
122 | 
123 | /**
124 |  * 聚合统计表
125 |  *
126 |  * @param taskid                       当前计算批次的ID
127 |  * @param session_count                所有Session的总和
128 |  * @param visit_length_1s_3s_ratio     1-3sSession访问时长占比
129 |  * @param visit_length_4s_6s_ratio     4-6sSession访问时长占比
130 |  * @param visit_length_7s_9s_ratio     7-9sSession访问时长占比
131 |  * @param visit_length_10s_30s_ratio   10-30sSession访问时长占比
132 |  * @param visit_length_30s_60s_ratio   30-60sSession访问时长占比
133 |  * @param visit_length_1m_3m_ratio     1-3mSession访问时长占比
134 |  * @param visit_length_3m_10m_ratio    3-10mSession访问时长占比
135 |  * @param visit_length_10m_30m_ratio   10-30mSession访问时长占比
136 |  * @param visit_length_30m_ratio       30mSession访问时长占比
137 |  * @param step_length_1_3_ratio        1-3步长占比
138 |  * @param step_length_4_6_ratio        4-6步长占比
139 |  * @param step_length_7_9_ratio        7-9步长占比
140 |  * @param step_length_10_30_ratio      10-30步长占比
141 |  * @param step_length_30_60_ratio      30-60步长占比
142 |  * @param step_length_60_ratio         大于60步长占比
143 |  */
144 | case class SessionAggrStat(taskid: String,
145 |                            session_count: Long,
146 |                            visit_length_1s_3s_ratio: Double,
147 |                            visit_length_4s_6s_ratio: Double,
148 |                            visit_length_7s_9s_ratio: Double,
149 |                            visit_length_10s_30s_ratio: Double,
150 |                            visit_length_30s_60s_ratio: Double,
151 |                            visit_length_1m_3m_ratio: Double,
152 |                            visit_length_3m_10m_ratio: Double,
153 |                            visit_length_10m_30m_ratio: Double,
154 |                            visit_length_30m_ratio: Double,
155 |                            step_length_1_3_ratio: Double,
156 |                            step_length_4_6_ratio: Double,
157 |                            step_length_7_9_ratio: Double,
158 |                            step_length_10_30_ratio: Double,
159 |                            step_length_30_60_ratio: Double,
160 |                            step_length_60_ratio: Double
161 |                           )
162 | 
163 | /**
164 |  * Session随机抽取表
165 |  *
166 |  * @param taskid             当前计算批次的ID
167 |  * @param sessionid          抽取的Session的ID
168 |  * @param startTime          Session的开始时间
169 |  * @param searchKeywords     Session的查询字段
170 |  * @param clickCategoryIds   Session点击的类别id集合
171 |  */
172 | case class SessionRandomExtract(taskid:String,
173 |                                 sessionid:String,
174 |                                 startTime:String,
175 |                                 searchKeywords:String,
176 |                                 clickCategoryIds:String)
177 | 
178 | /**
179 |  * Session随机抽取详细表
180 |  *
181 |  * @param taskid            当前计算批次的ID
182 |  * @param userid            用户的ID
183 |  * @param sessionid         Session的ID
184 |  * @param pageid            某个页面的ID
185 |  * @param actionTime        点击行为的时间点
186 |  * @param searchKeyword     用户搜索的关键词
187 |  * @param clickCategoryId   某一个商品品类的ID
188 |  * @param clickProductId    某一个商品的ID
189 |  * @param orderCategoryIds  一次订单中所有品类的ID集合
190 |  * @param orderProductIds   一次订单中所有商品的ID集合
191 |  * @param payCategoryIds    一次支付中所有品类的ID集合
192 |  * @param payProductIds     一次支付中所有商品的ID集合
193 |  **/
194 | case class SessionDetail(taskid:String,
195 |                          userid:Long,
196 |                          sessionid:String,
197 |                          pageid:Long,
198 |                          actionTime:String,
199 |                          searchKeyword:String,
200 |                          clickCategoryId:Long,
201 |                          clickProductId:Long,
202 |                          orderCategoryIds:String,
203 |                          orderProductIds:String,
204 |                          payCategoryIds:String,
205 |                          payProductIds:String)
206 | 
207 | /**
208 |  * 品类Top10表
209 |  * @param taskid
210 |  * @param categoryid
211 |  * @param clickCount
212 |  * @param orderCount
213 |  * @param payCount
214 |  */
215 | case class Top10Category(taskid:String,
216 |                          categoryid:Long,
217 |                          clickCount:Long,
218 |                          orderCount:Long,
219 |                          payCount:Long)
220 | 
221 | /**
222 |  * Top10 Session
223 |  * @param taskid
224 |  * @param categoryid
225 |  * @param sessionid
226 |  * @param clickCount
227 |  */
228 | case class Top10Session(taskid:String,
229 |                         categoryid:Long,
230 |                         sessionid:String,
231 |                         clickCount:Long)
232 | 


--------------------------------------------------------------------------------
/commons/src/main/java/commons/pool/PooledMySqlClientFactory.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
  3 |  */
  4 | 
  5 | package commons.pool
  6 | 
  7 | import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet}
  8 | 
  9 | import commons.conf.ConfigurationManager
 10 | import commons.constant.Constants
 11 | import org.apache.commons.pool2.impl.{DefaultPooledObject, GenericObjectPool, GenericObjectPoolConfig}
 12 | import org.apache.commons.pool2.{BasePooledObjectFactory, PooledObject}
 13 | 
 14 | // 创建用于处理MySQL查询结果的类的抽象接口
 15 | trait QueryCallback {
 16 |   def process(rs: ResultSet)
 17 | }
 18 | 
 19 | /**
 20 |   * MySQL客户端代理对象
 21 |   *
 22 |   * @param jdbcUrl      MySQL URL
 23 |   * @param jdbcUser     MySQL 用户
 24 |   * @param jdbcPassword MySQL 密码
 25 |   * @param client       默认客户端实现
 26 |   */
 27 | case class MySqlProxy(jdbcUrl: String, jdbcUser: String, jdbcPassword: String, client: Option[Connection] = None) {
 28 | 
 29 |   // 获取客户端连接对象
 30 |   private val mysqlClient = client getOrElse {
 31 |     DriverManager.getConnection(jdbcUrl, jdbcUser, jdbcPassword)
 32 |   }
 33 | 
 34 |   /**
 35 |     * 执行增删改SQL语句
 36 |     *
 37 |     * @param sql
 38 |     * @param params
 39 |     * @return 影响的行数
 40 |     */
 41 |   def executeUpdate(sql: String, params: Array[Any]): Int = {
 42 |     var rtn = 0
 43 |     var pstmt: PreparedStatement = null
 44 | 
 45 |     try {
 46 |       // 第一步：关闭自动提交
 47 |       mysqlClient.setAutoCommit(false)
 48 |       // 第二步：根据传入的sql语句创建prepareStatement
 49 |       pstmt = mysqlClient.prepareStatement(sql)
 50 | 
 51 |       // 第三步：为prepareStatement中的每个参数填写数值
 52 |       if (params != null && params.length > 0) {
 53 |         for (i <- 0 until params.length) {
 54 |           pstmt.setObject(i + 1, params(i))
 55 |         }
 56 |       }
 57 |       // 第四步：执行增删改操作
 58 |       rtn = pstmt.executeUpdate()
 59 |       // 第五步：手动提交
 60 |       mysqlClient.commit()
 61 |     } catch {
 62 |       case e: Exception => e.printStackTrace
 63 |     }
 64 |     rtn
 65 |   }
 66 | 
 67 |   /**
 68 |     * 执行查询SQL语句
 69 |     *
 70 |     * @param sql
 71 |     * @param params
 72 |     */
 73 |   def executeQuery(sql: String, params: Array[Any], queryCallback: QueryCallback) {
 74 |     var pstmt: PreparedStatement = null
 75 |     var rs: ResultSet = null
 76 | 
 77 |     try {
 78 |       // 第一步：根据传入的sql语句创建prepareStatement
 79 |       pstmt = mysqlClient.prepareStatement(sql)
 80 | 
 81 |       // 第二步：为prepareStatement中的每个参数填写数值
 82 |       if (params != null && params.length > 0) {
 83 |         for (i <- 0 until params.length) {
 84 |           pstmt.setObject(i + 1, params(i))
 85 |         }
 86 |       }
 87 | 
 88 |       // 第三步：执行查询操作
 89 |       rs = pstmt.executeQuery()
 90 |       // 第四步：处理查询后的结果
 91 |       queryCallback.process(rs)
 92 |     } catch {
 93 |       case e: Exception => e.printStackTrace
 94 |     }
 95 |   }
 96 | 
 97 |   /**
 98 |     * 批量执行SQL语句
 99 |     *
100 |     * @param sql
101 |     * @param paramsList
102 |     * @return 每条SQL语句影响的行数
103 |     */
104 |   def executeBatch(sql: String, paramsList: Array[Array[Any]]): Array[Int] = {
105 |     var rtn: Array[Int] = null
106 |     var pstmt: PreparedStatement = null
107 |     try {
108 |       // 第一步：关闭自动提交
109 |       mysqlClient.setAutoCommit(false)
110 |       pstmt = mysqlClient.prepareStatement(sql)
111 | 
112 |       // 第二步：为prepareStatement中的每个参数填写数值
113 |       if (paramsList != null && paramsList.length > 0) {
114 |         for (params <- paramsList) {
115 |           for (i <- 0 until params.length) {
116 |             pstmt.setObject(i + 1, params(i))
117 |           }
118 |           pstmt.addBatch()
119 |         }
120 |       }
121 | 
122 |       // 第三步：执行批量的SQL语句
123 |       rtn = pstmt.executeBatch()
124 | 
125 |       // 第四步：手动提交
126 |       mysqlClient.commit()
127 |     } catch {
128 |       case e: Exception => e.printStackTrace
129 |     }
130 |     rtn
131 |   }
132 |   def executeBatch1(sql:String): Unit ={
133 |    val pstmt = mysqlClient.prepareStatement(sql)
134 |     pstmt.execute();
135 |     println("succcess-=-----------------")
136 |   }
137 | 
138 |   // 关闭MySQL客户端
139 |   def shutdown(): Unit = mysqlClient.close()
140 | }
141 | 
142 | /**
143 |   * 将MySqlProxy实例视为对象，MySqlProxy实例的创建使用对象池进行维护
144 |   */
145 | 
146 | /**
147 |   * 创建自定义工厂类，继承BasePooledObjectFactory工厂类，负责对象的创建、包装和销毁
148 |   * @param jdbcUrl
149 |   * @param jdbcUser
150 |   * @param jdbcPassword
151 |   * @param client
152 |   */
153 | class PooledMySqlClientFactory(jdbcUrl: String, jdbcUser: String, jdbcPassword: String, client: Option[Connection] = None) extends BasePooledObjectFactory[MySqlProxy] with Serializable {
154 | 
155 |   // 用于池来创建对象
156 |   override def create(): MySqlProxy = MySqlProxy(jdbcUrl, jdbcUser, jdbcPassword, client)
157 | 
158 |   // 用于池来包装对象
159 |   override def wrap(obj: MySqlProxy): PooledObject[MySqlProxy] = new DefaultPooledObject(obj)
160 | 
161 |   // 用于池来销毁对象
162 |   override def destroyObject(p: PooledObject[MySqlProxy]): Unit = {
163 |     p.getObject.shutdown()
164 |     super.destroyObject(p)
165 |   }
166 | 
167 | }
168 | 
169 | /**
170 |   * 创建MySQL池工具类
171 |   */
172 | object CreateMySqlPool {
173 | 
174 |   // 加载JDBC驱动，只需要一次
175 |   Class.forName("com.mysql.cj.jdbc.Driver")
176 | 
177 |   // 在org.apache.commons.pool2.impl中预设了三个可以直接使用的对象池：GenericObjectPool、GenericKeyedObjectPool和SoftReferenceObjectPool
178 |   // 创建genericObjectPool为GenericObjectPool
179 |   // GenericObjectPool的特点是可以设置对象池中的对象特征，包括LIFO方式、最大空闲数、最小空闲数、是否有效性检查等等
180 |   private var genericObjectPool: GenericObjectPool[MySqlProxy] = null
181 | 
182 |   // 伴生对象通过apply完成对象的创建
183 |   def apply(): GenericObjectPool[MySqlProxy] = {
184 |     // 单例模式
185 |     if (this.genericObjectPool == null) {
186 |       this.synchronized {
187 |         // 获取MySQL配置参数
188 |         val jdbcUrl = ConfigurationManager.config.getString(Constants.JDBC_URL)
189 |         val jdbcUser = ConfigurationManager.config.getString(Constants.JDBC_USER)
190 |         val jdbcPassword = ConfigurationManager.config.getString(Constants.JDBC_PASSWORD)
191 |         val size = ConfigurationManager.config.getInt(Constants.JDBC_DATASOURCE_SIZE)
192 | 
193 |         val pooledFactory = new PooledMySqlClientFactory(jdbcUrl, jdbcUser, jdbcPassword)
194 |         val poolConfig = {
195 |           // 创建标准对象池配置类的实例
196 |           val c = new GenericObjectPoolConfig
197 |           // 设置配置对象参数
198 |           // 设置最大对象数
199 |           c.setMaxTotal(size)
200 |           // 设置最大空闲对象数
201 |           c.setMaxIdle(size)
202 |           c
203 |         }
204 |         // 对象池的创建需要工厂类和配置类
205 |         // 返回一个GenericObjectPool对象池
206 |         this.genericObjectPool = new GenericObjectPool[MySqlProxy](pooledFactory, poolConfig)
207 |       }
208 |     }
209 |     genericObjectPool
210 |   }
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------
/commons/src/main/java/commons/utils/Utils.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
  3 |  */
  4 | 
  5 | package commons.utils
  6 | 
  7 | import java.text.SimpleDateFormat
  8 | import java.util.{Calendar, Date}
  9 | 
 10 | 
 11 | import org.joda.time.DateTime
 12 | import org.joda.time.format.DateTimeFormat
 13 | 
 14 | import scala.collection.mutable
 15 | 
 16 | /**
 17 |   * 日期时间工具类
 18 |   * 使用Joda实现，使用Java提供的Date会存在线程安全问题
 19 |   */
 20 | object DateUtils {
 21 | 
 22 |   val TIME_FORMAT = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss")
 23 |   val DATE_FORMAT = DateTimeFormat.forPattern("yyyy-MM-dd")
 24 |   val DATEKEY_FORMAT = DateTimeFormat.forPattern("yyyyMMdd")
 25 |   val DATE_TIME_FORMAT = DateTimeFormat.forPattern("yyyyMMddHHmm")
 26 | 
 27 |   /**
 28 |     * 判断一个时间是否在另一个时间之前
 29 |     * @param time1 第一个时间
 30 |     * @param time2 第二个时间
 31 |     * @return 判断结果
 32 |     */
 33 |   def before(time1:String, time2:String):Boolean = {
 34 |     if(TIME_FORMAT.parseDateTime(time1).isBefore(TIME_FORMAT.parseDateTime(time2))) {
 35 |       return true
 36 |     }
 37 |     false
 38 |   }
 39 | 
 40 |   /**
 41 |     * 判断一个时间是否在另一个时间之后
 42 |     * @param time1 第一个时间
 43 |     * @param time2 第二个时间
 44 |     * @return 判断结果
 45 |     */
 46 |   def after(time1:String, time2:String):Boolean = {
 47 |     if(TIME_FORMAT.parseDateTime(time1).isAfter(TIME_FORMAT.parseDateTime(time2))) {
 48 |       return true
 49 |     }
 50 |     false
 51 |   }
 52 | 
 53 |   /**
 54 |     * 计算时间差值（单位为秒）
 55 |     * @param time1 时间1
 56 |     * @param time2 时间2
 57 |     * @return 差值
 58 |     */
 59 |   def minus(time1:String, time2:String): Int = {
 60 |     return (TIME_FORMAT.parseDateTime(time1).getMillis - TIME_FORMAT.parseDateTime(time2).getMillis)/1000 toInt
 61 |   }
 62 | 
 63 |   /**
 64 |     * 获取年月日和小时
 65 |     * @param datetime 时间（yyyy-MM-dd HH:mm:ss）
 66 |     * @return 结果（yyyy-MM-dd_HH）
 67 |     */
 68 |   def getDateHour(datetime:String):String = {
 69 |     val date = datetime.split(" ")(0)
 70 |     val hourMinuteSecond = datetime.split(" ")(1)
 71 |     val hour = hourMinuteSecond.split(":")(0)
 72 |     date + "_" + hour
 73 |   }
 74 | 
 75 |   /**
 76 |     * 获取当天日期（yyyy-MM-dd）
 77 |     * @return 当天日期
 78 |     */
 79 |   def getTodayDate():String = {
 80 |     DateTime.now().toString(DATE_FORMAT)
 81 |   }
 82 | 
 83 |   /**
 84 |     * 获取昨天的日期（yyyy-MM-dd）
 85 |     * @return 昨天的日期
 86 |     */
 87 |   def getYesterdayDate():String = {
 88 |     DateTime.now().minusDays(1).toString(DATE_FORMAT)
 89 |   }
 90 | 
 91 |   /**
 92 |     * 格式化日期（yyyy-MM-dd）
 93 |     * @param date Date对象
 94 |     * @return 格式化后的日期
 95 |     */
 96 |   def formatDate(date:Date):String = {
 97 |     new DateTime(date).toString(DATE_FORMAT)
 98 |   }
 99 | 
100 |   /**
101 |     * 格式化时间（yyyy-MM-dd HH:mm:ss）
102 |     * @param date Date对象
103 |     * @return 格式化后的时间
104 |     */
105 |   def formatTime(date:Date):String = {
106 |     new DateTime(date).toString(TIME_FORMAT)
107 |   }
108 | 
109 |   /**
110 |     * 解析时间字符串
111 |     * @param time 时间字符串
112 |     * @return Date
113 |     */
114 |   def parseTime(time:String):Date = {
115 |     TIME_FORMAT.parseDateTime(time).toDate
116 |   }
117 | 
118 |   def main(args: Array[String]): Unit = {
119 |     print(DateUtils.parseTime("2017-10-31 20:27:53"))
120 |   }
121 | 
122 |   /**
123 |     * 格式化日期key
124 |     * @param date
125 |     * @return
126 |     */
127 |   def formatDateKey(date:Date):String = {
128 |     new DateTime(date).toString(DATEKEY_FORMAT)
129 |   }
130 | 
131 |   /**
132 |     * 格式化日期key
133 |     * @return
134 |     */
135 |   def parseDateKey(datekey: String ):Date = {
136 |     DATEKEY_FORMAT.parseDateTime(datekey).toDate
137 |   }
138 | 
139 |   /**
140 |     * 格式化时间，保留到分钟级别
141 |     * yyyyMMddHHmm
142 |     * @param date
143 |     * @return
144 |     */
145 |   def formatTimeMinute(date: Date):String = {
146 |     new DateTime(date).toString(DATE_TIME_FORMAT)
147 |   }
148 | 
149 | }
150 | 
151 | 
152 | 
153 | object ParamUtils{
154 |   def getPageFlow(): Array[String] ={
155 |      var z=Array(1,2,3,4,5,6,7);
156 |      val r=z.slice(0,z.length-1).zip(z.tail).map{
157 |        case (p1,p2)=>{
158 |          p1+"-"+p2;
159 |        }
160 |      }
161 |     r;
162 |   }
163 | }
164 | /**
165 |   * 数字格工具类
166 |   *
167 |   *
168 |   */
169 | object NumberUtils {
170 | 
171 |   /**
172 |     * 格式化小数
173 |     * @param scale 四舍五入的位数
174 |     * @return 格式化小数
175 |     */
176 |   def formatDouble(num:Double, scale:Int):Double = {
177 |     val bd = BigDecimal(num)
178 |     bd.setScale(scale, BigDecimal.RoundingMode.HALF_UP).doubleValue()
179 |   }
180 | 
181 | }
182 | 
183 | 
184 | 
185 | 
186 | /**
187 |   * 字符串工具类
188 |   *
189 |   */
190 | object StringUtil {
191 | 
192 |   /**
193 |     * 判断字符串是否为空
194 |     * @param str 字符串
195 |     * @return 是否为空
196 |     */
197 |   def isEmpty(str:String):Boolean = {
198 |     str == null || "".equals(str)
199 |   }
200 | 
201 |   /**
202 |     * 判断字符串是否不为空
203 |     * @param str 字符串
204 |     * @return 是否不为空
205 |     */
206 |   def isNotEmpty(str:String):Boolean = {
207 |     str != null && !"".equals(str)
208 |   }
209 | 
210 |   /**
211 |     * 截断字符串两侧的逗号
212 |     * @param str 字符串
213 |     * @return 字符串
214 |     */
215 |   def trimComma(str:String):String = {
216 |     var result = ""
217 |     if(str.startsWith(",")) {
218 |       result = str.substring(1)
219 |     }
220 |     if(str.endsWith(",")) {
221 |       result = str.substring(0, str.length() - 1)
222 |     }
223 |     result
224 |   }
225 | 
226 |   /**
227 |     * 补全两位数字
228 |     * @param str
229 |     * @return
230 |     */
231 |   def fulfuill(str: String):String = {
232 |     if(str.length() == 2) {
233 |       str
234 |     } else {
235 |       "0" + str
236 |     }
237 |   }
238 | 
239 |   /**
240 |     * 从拼接的字符串中提取字段
241 |     * @param str 字符串
242 |     * @param delimiter 分隔符
243 |     * @param field 字段
244 |     * @return 字段值
245 |     */
246 |   def getFieldFromConcatString(str:String, delimiter:String, field:String):String = {
247 |     try {
248 |       val fields = str.split(delimiter);
249 |       for(concatField <- fields) {
250 |         if(concatField.split("=").length == 2) {
251 |           val fieldName = concatField.split("=")(0)
252 |           val fieldValue = concatField.split("=")(1)
253 |           if(fieldName.equals(field)) {
254 |             return fieldValue
255 |           }
256 |         }
257 |       }
258 |     } catch{
259 |       case e:Exception => e.printStackTrace()
260 |     }
261 |     null
262 |   }
263 | 
264 |   /**
265 |     * 从拼接的字符串中给字段设置值
266 |     * @param str 字符串
267 |     * @param delimiter 分隔符
268 |     * @param field 字段名
269 |     * @param newFieldValue 新的field值
270 |     * @return 字段值
271 |     */
272 |   def setFieldInConcatString(str:String, delimiter:String, field:String, newFieldValue:String):String = {
273 | 
274 |     val fieldsMap = new mutable.HashMap[String,String]()
275 | 
276 |     for(fileds <- str.split(delimiter)){
277 |       var arra = fileds.split("=")
278 |       if(arra(0).compareTo(field) == 0)
279 |         fieldsMap += (field -> newFieldValue)
280 |       else
281 |         fieldsMap += (arra(0) -> arra(1))
282 |     }
283 |     fieldsMap.map(item=> item._1 + "=" + item._2).mkString(delimiter)
284 |   }
285 | 
286 | }
287 | 
288 | 
289 | /**
290 |   * 校验工具类
291 |   *
292 |   */
293 | object ValidUtils {
294 | 
295 |   /**
296 |     * 校验数据中的指定字段，是否在指定范围内
297 |     * @param data 数据
298 |     * @param dataField 数据字段
299 |     * @param parameter 参数
300 |     * @param startParamField 起始参数字段
301 |     * @param endParamField 结束参数字段
302 |     * @return 校验结果
303 |     */
304 |   def between(data:String, dataField:String, parameter:String, startParamField:String, endParamField:String):Boolean = {
305 | 
306 |     val startParamFieldStr = StringUtil.getFieldFromConcatString(parameter, "\\|", startParamField)
307 |     val endParamFieldStr = StringUtil.getFieldFromConcatString(parameter, "\\|", endParamField)
308 |     if(startParamFieldStr == null || endParamFieldStr == null) {
309 |       return true
310 |     }
311 | 
312 |     val startParamFieldValue = startParamFieldStr.toInt
313 |     val endParamFieldValue = endParamFieldStr.toInt
314 | 
315 |     val dataFieldStr = StringUtil.getFieldFromConcatString(data, "\\|", dataField)
316 |     if(dataFieldStr != null) {
317 |       val dataFieldValue = dataFieldStr.toInt
318 |       if(dataFieldValue >= startParamFieldValue && dataFieldValue <= endParamFieldValue) {
319 |         return true
320 |       } else {
321 |         return false
322 |       }
323 |     }
324 |     false
325 |   }
326 | 
327 |   /**
328 |     * 校验数据中的指定字段，是否有值与参数字段的值相同
329 |     * @param data 数据
330 |     * @param dataField 数据字段
331 |     * @param parameter 参数
332 |     * @param paramField 参数字段
333 |     * @return 校验结果
334 |     */
335 |   def in(data:String, dataField:String, parameter:String, paramField:String):Boolean = {
336 |     val paramFieldValue = StringUtil.getFieldFromConcatString(parameter, "\\|", paramField)
337 |     if(paramFieldValue == null) {
338 |       return true
339 |     }
340 |     val paramFieldValueSplited = paramFieldValue.split(",")
341 | 
342 |     val dataFieldValue = StringUtil.getFieldFromConcatString(data, "\\|", dataField)
343 |     if(dataFieldValue != null && dataFieldValue != "-1") {
344 |       val dataFieldValueSplited = dataFieldValue.split(",")
345 | 
346 |       for(singleDataFieldValue <- dataFieldValueSplited) {
347 |         for(singleParamFieldValue <- paramFieldValueSplited) {
348 |           if(singleDataFieldValue.compareTo(singleParamFieldValue) ==0) {
349 |             return true
350 |           }
351 |         }
352 |       }
353 |     }
354 |     false
355 |   }
356 | 
357 |   /**
358 |     * 校验数据中的指定字段，是否在指定范围内
359 |     * @param data 数据
360 |     * @param dataField 数据字段
361 |     * @param parameter 参数
362 |     * @param paramField 参数字段
363 |     * @return 校验结果
364 |     */
365 |   def equal(data:String, dataField:String, parameter:String, paramField:String):Boolean = {
366 |     val paramFieldValue = StringUtil.getFieldFromConcatString(parameter, "\\|", paramField)
367 |     if(paramFieldValue == null) {
368 |       return true
369 |     }
370 | 
371 |     val dataFieldValue = StringUtil.getFieldFromConcatString(data, "\\|", dataField)
372 |     if(dataFieldValue != null) {
373 |       if(dataFieldValue.compareTo(paramFieldValue) == 0) {
374 |         return true
375 |       }
376 |     }
377 |     false
378 |   }
379 | 
380 | }


--------------------------------------------------------------------------------
/commons/src/main/resources/commerce.properties:
--------------------------------------------------------------------------------
 1 | 
 2 | # jbdc配置
 3 | jdbc.datasource.size=10
 4 | jdbc.url=jdbc:mysql://localhost:3306/commerce?useUnicode=true&characterEncoding=utf8&serverTimezone=UTC
 5 | jdbc.user=root
 6 | jdbc.password=123456
 7 | 
 8 | # 指定分析的用户范围
 9 | # 可以使用的属性如下：
10 | #      startDate： 格式： yyyy-MM-DD   [必选]
11 | #      endDate:    格式： yyyy-MM-DD   [必选]
12 | #      startAge:   范围： 0 - 59
13 | #      endAge:     范围： 0 - 59
14 | #      professionals： 范围：professionals[0 - 59]
15 | #      cities:     0 - 9  ((0,"北京","华北"),(1,"上海","华东"),(2,"南京","华东"),(3,"广州","华南"),(4,"三亚","华南"),(5,"武汉","华中"),(6,"长沙","华中"),(7,"西安","西北"),(8,"成都","西南"),(9,"哈尔滨","东北"))
16 | #      sex:        范围： 0 - 1
17 | #      keywords:   范围： ("火锅", "蛋糕", "重庆辣子鸡", "重庆小面", "呷哺呷哺", "新辣道鱼火锅", "国贸大厦", "太古商场", "日本料理", "温泉")
18 | #      categoryIds：0 - 99，以逗号分隔
19 | #      targetPageFlow： 0 - 99， 以逗号分隔
20 | task.params.json={startDate:"2020-05-21", \
21 |   endDate:"2020-05-24", \
22 |   startAge: 20, \
23 |   endAge: 50, \
24 |   professionals: "",  \
25 |   cities: "", \
26 |   sex:"", \
27 |   keywords:"", \
28 |   categoryIds:"", \
29 |   targetPageFlow:"1,2,3,4,5,6,7"}
30 | 
31 | # Kafka配置
32 | kafka.broker.list=121.199.16.65:9092
33 | kafka.topics=AdRealTimeLog1


--------------------------------------------------------------------------------
/commons/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | log4j.rootLogger=info, stdout
 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n
 5 | 
 6 | log4j.appender.R=org.apache.log4j.RollingFileAppender
 7 | log4j.appender.R.File=../log/agent.log
 8 | log4j.appender.R.MaxFileSize=1024KB
 9 | log4j.appender.R.MaxBackupIndex=1
10 | 
11 | log4j.appender.R.layout=org.apache.log4j.PatternLayout
12 | log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%6L)  :  %m%n


--------------------------------------------------------------------------------
/commons/target/classes/commerce.properties:
--------------------------------------------------------------------------------
 1 | 
 2 | # jbdc配置
 3 | jdbc.datasource.size=10
 4 | jdbc.url=jdbc:mysql://localhost:3306/commerce?useUnicode=true&characterEncoding=utf8&serverTimezone=UTC
 5 | jdbc.user=root
 6 | jdbc.password=123456
 7 | 
 8 | # 指定分析的用户范围
 9 | # 可以使用的属性如下：
10 | #      startDate： 格式： yyyy-MM-DD   [必选]
11 | #      endDate:    格式： yyyy-MM-DD   [必选]
12 | #      startAge:   范围： 0 - 59
13 | #      endAge:     范围： 0 - 59
14 | #      professionals： 范围：professionals[0 - 59]
15 | #      cities:     0 - 9  ((0,"北京","华北"),(1,"上海","华东"),(2,"南京","华东"),(3,"广州","华南"),(4,"三亚","华南"),(5,"武汉","华中"),(6,"长沙","华中"),(7,"西安","西北"),(8,"成都","西南"),(9,"哈尔滨","东北"))
16 | #      sex:        范围： 0 - 1
17 | #      keywords:   范围： ("火锅", "蛋糕", "重庆辣子鸡", "重庆小面", "呷哺呷哺", "新辣道鱼火锅", "国贸大厦", "太古商场", "日本料理", "温泉")
18 | #      categoryIds：0 - 99，以逗号分隔
19 | #      targetPageFlow： 0 - 99， 以逗号分隔
20 | task.params.json={startDate:"2020-05-21", \
21 |   endDate:"2020-05-24", \
22 |   startAge: 20, \
23 |   endAge: 50, \
24 |   professionals: "",  \
25 |   cities: "", \
26 |   sex:"", \
27 |   keywords:"", \
28 |   categoryIds:"", \
29 |   targetPageFlow:"1,2,3,4,5,6,7"}
30 | 
31 | # Kafka配置
32 | kafka.broker.list=121.199.16.65:9092
33 | kafka.topics=AdRealTimeLog1


--------------------------------------------------------------------------------
/commons/target/classes/commons/conf/ConfigurationManager$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/conf/ConfigurationManager$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/conf/ConfigurationManager.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/conf/ConfigurationManager.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/constant/Constants$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/constant/Constants$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/constant/Constants.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/constant/Constants.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdBlacklist$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdBlacklist$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdBlacklist.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdBlacklist.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdClickTrend$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdClickTrend$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdClickTrend.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdClickTrend.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdProvinceTop3$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdProvinceTop3$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdProvinceTop3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdProvinceTop3.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdStat$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdStat$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdStat.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdStat.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdUserClickCount$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdUserClickCount$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/AdUserClickCount.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/AdUserClickCount.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/ProductInfo$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/ProductInfo$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/ProductInfo.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/ProductInfo.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionAggrStat$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionAggrStat$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionAggrStat.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionAggrStat.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionDetail$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionDetail$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionDetail.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionDetail.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionRandomExtract$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionRandomExtract$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/SessionRandomExtract.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/SessionRandomExtract.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/Top10Category$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/Top10Category$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/Top10Category.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/Top10Category.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/Top10Session$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/Top10Session$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/Top10Session.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/Top10Session.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/UserInfo$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/UserInfo$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/UserInfo.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/UserInfo.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/UserVisitAction$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/UserVisitAction$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/model/UserVisitAction.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/model/UserVisitAction.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/CreateMySqlPool$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/CreateMySqlPool$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/CreateMySqlPool.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/CreateMySqlPool.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/MySqlProxy$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/MySqlProxy$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/MySqlProxy.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/MySqlProxy.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/PooledMySqlClientFactory$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/PooledMySqlClientFactory$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/PooledMySqlClientFactory.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/PooledMySqlClientFactory.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/pool/QueryCallback.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/pool/QueryCallback.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/DateUtils$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/DateUtils$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/DateUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/DateUtils.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/NumberUtils$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/NumberUtils$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/NumberUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/NumberUtils.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/ParamUtils$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/ParamUtils$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/ParamUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/ParamUtils.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/StringUtil$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/StringUtil$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/StringUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/StringUtil.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/ValidUtils$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/ValidUtils$.class


--------------------------------------------------------------------------------
/commons/target/classes/commons/utils/ValidUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/commons/target/classes/commons/utils/ValidUtils.class


--------------------------------------------------------------------------------
/commons/target/classes/log4j.properties:
--------------------------------------------------------------------------------
 1 | log4j.rootLogger=info, stdout
 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n
 5 | 
 6 | log4j.appender.R=org.apache.log4j.RollingFileAppender
 7 | log4j.appender.R.File=../log/agent.log
 8 | log4j.appender.R.MaxFileSize=1024KB
 9 | log4j.appender.R.MaxBackupIndex=1
10 | 
11 | log4j.appender.R.layout=org.apache.log4j.PatternLayout
12 | log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%6L)  :  %m%n


--------------------------------------------------------------------------------
/commons/target/classes/test/DataModel.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017. Atguigu Inc. All Rights Reserved.
 3 |  * Date: 10/29/17 11:14 AM.
 4 |  * Author: wuyufei.
 5 |  */
 6 | 
 7 | /**
 8 |   * 广告黑名单
 9 |   *
10 |   */
11 | case class AdBlacklist(userid:Long)
12 | 
13 | /**
14 |   * 用户广告点击量
15 |   * @author wuyufei
16 |   *
17 |   */
18 | case class AdUserClickCount(date:String,
19 |                             userid:Long,
20 |                             adid:Long,
21 |                             clickCount:Long)
22 | 
23 | 
24 | /**
25 |   * 广告实时统计
26 |   *
27 |   */
28 | case class AdStat(date:String,
29 |                   province:String,
30 |                   city:String,
31 |                   adid:Long,
32 |                   clickCount:Long)
33 | 
34 | /**
35 |   * 各省top3热门广告
36 |   *
37 |   */
38 | case class AdProvinceTop3(date:String,
39 |                           province:String,
40 |                           adid:Long,
41 |                           clickCount:Long)
42 | 
43 | /**
44 |   * 广告点击趋势
45 |   *
46 |   */
47 | case class AdClickTrend(date:String,
48 |                         hour:String,
49 |                         minute:String,
50 |                         adid:Long,
51 |                         clickCount:Long)


--------------------------------------------------------------------------------
/commons/target/classes/test/JdbcHelper.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2017. Atguigu Inc. All Rights Reserved.
  3 |  * Date: 11/1/17 3:40 PM.
  4 |  * Author: wuyufei.
  5 |  */
  6 | 
  7 | import java.sql.ResultSet
  8 | 
  9 | import commons.pool.{CreateMySqlPool, QueryCallback}
 10 | 
 11 | import scala.collection.mutable.ArrayBuffer
 12 | 
 13 | /**
 14 |   * 用户黑名单DAO类
 15 |   */
 16 | object AdBlacklistDAO {
 17 | 
 18 |   /**
 19 |     * 批量插入广告黑名单用户
 20 |     *
 21 |     * @param adBlacklists
 22 |     */
 23 |   def insertBatch(adBlacklists: Array[AdBlacklist]) {
 24 |     // 批量插入
 25 |     val sql = "INSERT INTO ad_blacklist VALUES(?)"
 26 | 
 27 |     val paramsList = new ArrayBuffer[Array[Any]]()
 28 | 
 29 |     // 向paramsList添加userId
 30 |     for (adBlacklist <- adBlacklists) {
 31 |       val params: Array[Any] = Array(adBlacklist.userid)
 32 |       paramsList += params
 33 |     }
 34 |     // 获取对象池单例对象
 35 |     val mySqlPool = CreateMySqlPool()
 36 |     // 从对象池中提取对象
 37 |     val client = mySqlPool.borrowObject()
 38 | 
 39 |     // 执行批量插入操作
 40 |     client.executeBatch(sql, paramsList.toArray)
 41 |     // 使用完成后将对象返回给对象池
 42 |     mySqlPool.returnObject(client)
 43 |   }
 44 | 
 45 |   /**
 46 |     * 查询所有广告黑名单用户
 47 |     *
 48 |     * @return
 49 |     */
 50 |   def findAll(): Array[AdBlacklist] = {
 51 |     // 将黑名单中的所有数据查询出来
 52 |     val sql = "SELECT * FROM ad_blacklist"
 53 | 
 54 |     val adBlacklists = new ArrayBuffer[AdBlacklist]()
 55 | 
 56 |     // 获取对象池单例对象
 57 |     val mySqlPool = CreateMySqlPool()
 58 |     // 从对象池中提取对象
 59 |     val client = mySqlPool.borrowObject()
 60 | 
 61 |     // 执行sql查询并且通过处理函数将所有的userid加入array中
 62 |     client.executeQuery(sql, null, new QueryCallback {
 63 |       override def process(rs: ResultSet): Unit = {
 64 |         while (rs.next()) {
 65 |           val userid = rs.getInt(1).toLong
 66 |           adBlacklists += AdBlacklist(userid)
 67 |         }
 68 |       }
 69 |     })
 70 | 
 71 |     // 使用完成后将对象返回给对象池
 72 |     mySqlPool.returnObject(client)
 73 |     adBlacklists.toArray
 74 |   }
 75 | }
 76 | 
 77 | 
 78 | /**
 79 |   * 用户广告点击量DAO实现类
 80 |   *
 81 |   */
 82 | object AdUserClickCountDAO {
 83 | 
 84 |   def updateBatch(adUserClickCounts: Array[AdUserClickCount]) {
 85 |     // 获取对象池单例对象
 86 |     val mySqlPool = CreateMySqlPool()
 87 |     // 从对象池中提取对象
 88 |     val client = mySqlPool.borrowObject()
 89 | 
 90 |     // 首先对用户广告点击量进行分类，分成待插入的和待更新的
 91 |     val insertAdUserClickCounts = ArrayBuffer[AdUserClickCount]()
 92 |     val updateAdUserClickCounts = ArrayBuffer[AdUserClickCount]()
 93 | 
 94 |     val selectSQL = "SELECT count(*) FROM ad_user_click_count WHERE date=? AND userid=? AND adid=? "
 95 | 
 96 |     for (adUserClickCount <- adUserClickCounts) {
 97 | 
 98 |       val selectParams: Array[Any] = Array(adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid)
 99 |       // 根据传入的用户点击次数统计数据从已有的ad_user_click_count中进行查询
100 |       client.executeQuery(selectSQL, selectParams, new QueryCallback {
101 |         override def process(rs: ResultSet): Unit = {
102 |           // 如果能查询到并且点击次数大于0，则认为是待更新项
103 |           if (rs.next() && rs.getInt(1) > 0) {
104 |             updateAdUserClickCounts += adUserClickCount
105 |           } else {
106 |             insertAdUserClickCounts += adUserClickCount
107 |           }
108 |         }
109 |       })
110 |     }
111 | 
112 |     // 执行批量插入
113 |     val insertSQL = "INSERT INTO ad_user_click_count VALUES(?,?,?,?)"
114 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
115 | 
116 |     // 将待插入项全部加入到参数列表中
117 |     for (adUserClickCount <- insertAdUserClickCounts) {
118 |       insertParamsList += Array[Any](adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid, adUserClickCount.clickCount)
119 |     }
120 | 
121 |     // 执行批量插入
122 |     client.executeBatch(insertSQL, insertParamsList.toArray)
123 | 
124 |     // 执行批量更新
125 |     // clickCount=clickCount + ：此处的UPDATE是进行累加
126 |     val updateSQL = "UPDATE ad_user_click_count SET clickCount=clickCount + ? WHERE date=? AND userid=? AND adid=?"
127 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
128 | 
129 |     // 将待更新项全部加入到参数列表中
130 |     for (adUserClickCount <- updateAdUserClickCounts) {
131 |       updateParamsList += Array[Any](adUserClickCount.clickCount, adUserClickCount.date, adUserClickCount.userid, adUserClickCount.adid)
132 |     }
133 | 
134 |     // 执行批量更新
135 |     client.executeBatch(updateSQL, updateParamsList.toArray)
136 | 
137 |     // 使用完成后将对象返回给对象池
138 |     mySqlPool.returnObject(client)
139 |   }
140 | 
141 |   /**
142 |     * 根据多个key查询用户广告点击量
143 |     *
144 |     * @param date   日期
145 |     * @param userid 用户id
146 |     * @param adid   广告id
147 |     * @return
148 |     */
149 |   def findClickCountByMultiKey(date: String, userid: Long, adid: Long): Int = {
150 |     // 获取对象池单例对象
151 |     val mySqlPool = CreateMySqlPool()
152 |     // 从对象池中提取对象
153 |     val client = mySqlPool.borrowObject()
154 | 
155 |     val sql = "SELECT clickCount FROM ad_user_click_count " +
156 |       "WHERE date=? " +
157 |       "AND userid=? " +
158 |       "AND adid=?"
159 | 
160 |     var clickCount = 0
161 |     val params = Array[Any](date, userid, adid)
162 | 
163 |     // 根据多个条件查询指定用户的点击量，将查询结果累加到clickCount中
164 |     client.executeQuery(sql, params, new QueryCallback {
165 |       override def process(rs: ResultSet): Unit = {
166 |         if (rs.next()) {
167 |           clickCount = rs.getInt(1)
168 |         }
169 |       }
170 |     })
171 |     // 使用完成后将对象返回给对象池
172 |     mySqlPool.returnObject(client)
173 |     clickCount
174 |   }
175 | }
176 | 
177 | 
178 | /**
179 |   * 广告实时统计DAO实现类
180 |   *
181 |   * @author Administrator
182 |   *
183 |   */
184 | object AdStatDAO {
185 | 
186 |   def updateBatch(adStats: Array[AdStat]) {
187 |     // 获取对象池单例对象
188 |     val mySqlPool = CreateMySqlPool()
189 |     // 从对象池中提取对象
190 |     val client = mySqlPool.borrowObject()
191 | 
192 | 
193 |     // 区分开来哪些是要插入的，哪些是要更新的
194 |     val insertAdStats = ArrayBuffer[AdStat]()
195 |     val updateAdStats = ArrayBuffer[AdStat]()
196 | 
197 |     val selectSQL = "SELECT count(*) " +
198 |       "FROM ad_stat " +
199 |       "WHERE date=? " +
200 |       "AND province=? " +
201 |       "AND city=? " +
202 |       "AND adid=?"
203 | 
204 |     for (adStat <- adStats) {
205 | 
206 |       val params = Array[Any](adStat.date, adStat.province, adStat.city, adStat.adid)
207 |       // 通过查询结果判断当前项时待插入还是待更新
208 |       client.executeQuery(selectSQL, params, new QueryCallback {
209 |         override def process(rs: ResultSet): Unit = {
210 |           if (rs.next() && rs.getInt(1) > 0) {
211 |             updateAdStats += adStat
212 |           } else {
213 |             insertAdStats += adStat
214 |           }
215 |         }
216 |       })
217 |     }
218 | 
219 |     // 对于需要插入的数据，执行批量插入操作
220 |     val insertSQL = "INSERT INTO ad_stat VALUES(?,?,?,?,?)"
221 | 
222 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
223 | 
224 |     for (adStat <- insertAdStats) {
225 |       insertParamsList += Array[Any](adStat.date, adStat.province, adStat.city, adStat.adid, adStat.clickCount)
226 |     }
227 | 
228 |     client.executeBatch(insertSQL, insertParamsList.toArray)
229 | 
230 |     // 对于需要更新的数据，执行批量更新操作
231 |     // 此处的UPDATE是进行覆盖
232 |     val updateSQL = "UPDATE ad_stat SET clickCount=? " +
233 |       "WHERE date=? " +
234 |       "AND province=? " +
235 |       "AND city=? " +
236 |       "AND adid=?"
237 | 
238 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
239 | 
240 |     for (adStat <- updateAdStats) {
241 |       updateParamsList += Array[Any](adStat.clickCount, adStat.date, adStat.province, adStat.city, adStat.adid)
242 |     }
243 | 
244 |     client.executeBatch(updateSQL, updateParamsList.toArray)
245 | 
246 |     // 使用完成后将对象返回给对象池
247 |     mySqlPool.returnObject(client)
248 |   }
249 | 
250 | }
251 | 
252 | 
253 | /**
254 |   * 各省份top3热门广告DAO实现类
255 |   *
256 |   * @author Administrator
257 |   *
258 |   */
259 | object AdProvinceTop3DAO {
260 | 
261 |   def updateBatch(adProvinceTop3s: Array[AdProvinceTop3]) {
262 |     // 获取对象池单例对象
263 |     val mySqlPool = CreateMySqlPool()
264 |     // 从对象池中提取对象
265 |     val client = mySqlPool.borrowObject()
266 | 
267 |     // dateProvinces可以实现一次去重
268 |     // AdProvinceTop3：date province adid clickCount，由于每条数据由date province adid组成
269 |     // 当只取date province时，一定会有重复的情况
270 |     val dateProvinces = ArrayBuffer[String]()
271 | 
272 |     for (adProvinceTop3 <- adProvinceTop3s) {
273 |       // 组合新key
274 |       val key = adProvinceTop3.date + "_" + adProvinceTop3.province
275 | 
276 |       // dateProvinces中不包含当前key才添加
277 |       // 借此去重
278 |       if (!dateProvinces.contains(key)) {
279 |         dateProvinces += key
280 |       }
281 |     }
282 | 
283 |     // 根据去重后的date和province，进行批量删除操作
284 |     // 先将原来的数据全部删除
285 |     val deleteSQL = "DELETE FROM ad_province_top3 WHERE date=? AND province=?"
286 | 
287 |     val deleteParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
288 | 
289 |     for (dateProvince <- dateProvinces) {
290 | 
291 |       val dateProvinceSplited = dateProvince.split("_")
292 |       val date = dateProvinceSplited(0)
293 |       val province = dateProvinceSplited(1)
294 | 
295 |       val params = Array[Any](date, province)
296 |       deleteParamsList += params
297 |     }
298 | 
299 |     client.executeBatch(deleteSQL, deleteParamsList.toArray)
300 | 
301 |     // 批量插入传入进来的所有数据
302 |     val insertSQL = "INSERT INTO ad_province_top3 VALUES(?,?,?,?)"
303 | 
304 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
305 | 
306 |     // 将传入的数据转化为参数列表
307 |     for (adProvinceTop3 <- adProvinceTop3s) {
308 |       insertParamsList += Array[Any](adProvinceTop3.date, adProvinceTop3.province, adProvinceTop3.adid, adProvinceTop3.clickCount)
309 |     }
310 | 
311 |     client.executeBatch(insertSQL, insertParamsList.toArray)
312 | 
313 |     // 使用完成后将对象返回给对象池
314 |     mySqlPool.returnObject(client)
315 |   }
316 | 
317 | }
318 | 
319 | 
320 | /**
321 |   * 广告点击趋势DAO实现类
322 |   *
323 |   * @author Administrator
324 |   *
325 |   */
326 | object AdClickTrendDAO {
327 | 
328 |   def updateBatch(adClickTrends: Array[AdClickTrend]) {
329 |     // 获取对象池单例对象
330 |     val mySqlPool = CreateMySqlPool()
331 |     // 从对象池中提取对象
332 |     val client = mySqlPool.borrowObject()
333 | 
334 |     // 区分开来哪些是要插入的，哪些是要更新的
335 |     val updateAdClickTrends = ArrayBuffer[AdClickTrend]()
336 |     val insertAdClickTrends = ArrayBuffer[AdClickTrend]()
337 | 
338 |     val selectSQL = "SELECT count(*) " +
339 |       "FROM ad_click_trend " +
340 |       "WHERE date=? " +
341 |       "AND hour=? " +
342 |       "AND minute=? " +
343 |       "AND adid=?"
344 | 
345 |     for (adClickTrend <- adClickTrends) {
346 |       // 通过查询结果判断当前项时待插入还是待更新
347 |       val params = Array[Any](adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid)
348 |       client.executeQuery(selectSQL, params, new QueryCallback {
349 |         override def process(rs: ResultSet): Unit = {
350 |           if (rs.next() && rs.getInt(1) > 0) {
351 |             updateAdClickTrends += adClickTrend
352 |           } else {
353 |             insertAdClickTrends += adClickTrend
354 |           }
355 |         }
356 |       })
357 | 
358 |     }
359 | 
360 |     // 执行批量更新操作
361 |     // 此处的UPDATE是覆盖
362 |     val updateSQL = "UPDATE ad_click_trend SET clickCount=? " +
363 |       "WHERE date=? " +
364 |       "AND hour=? " +
365 |       "AND minute=? " +
366 |       "AND adid=?"
367 | 
368 |     val updateParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
369 | 
370 |     for (adClickTrend <- updateAdClickTrends) {
371 |       updateParamsList += Array[Any](adClickTrend.clickCount, adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid)
372 |     }
373 | 
374 |     client.executeBatch(updateSQL, updateParamsList.toArray)
375 | 
376 |     // 执行批量更新操作
377 |     val insertSQL = "INSERT INTO ad_click_trend VALUES(?,?,?,?,?)"
378 | 
379 |     val insertParamsList: ArrayBuffer[Array[Any]] = ArrayBuffer[Array[Any]]()
380 | 
381 |     for (adClickTrend <- insertAdClickTrends) {
382 |       insertParamsList += Array[Any](adClickTrend.date, adClickTrend.hour, adClickTrend.minute, adClickTrend.adid, adClickTrend.clickCount)
383 |     }
384 | 
385 |     client.executeBatch(insertSQL, insertParamsList.toArray)
386 | 
387 |     // 使用完成后将对象返回给对象池
388 |     mySqlPool.returnObject(client)
389 |   }
390 | 
391 | }
392 | 
393 | 


--------------------------------------------------------------------------------
/commons/target/classes/test/PageConvertStat.scala:
--------------------------------------------------------------------------------
  1 | import java.util.UUID
  2 | 
  3 | import commons.conf.ConfigurationManager
  4 | import commons.constant.Constants
  5 | import commons.model.UserVisitAction
  6 | import commons.utils.{DateUtils, ParamUtils}
  7 | import net.sf.json.JSONObject
  8 | import org.apache.spark.SparkConf
  9 | import org.apache.spark.sql.{SaveMode, SparkSession}
 10 | 
 11 | import scala.collection.mutable
 12 | 
 13 | object PageConvertStat {
 14 | 
 15 |   def main(args: Array[String]): Unit = {
 16 | 
 17 |     // 获取任务限制条件
 18 |     val jsonStr = ConfigurationManager.config.getString(Constants.TASK_PARAMS)
 19 |     val taskParam = JSONObject.fromObject(jsonStr)
 20 | 
 21 |     // 获取唯一主键
 22 |     val taskUUID = UUID.randomUUID().toString
 23 | 
 24 |     // 创建sparkConf
 25 |     val sparkConf = new SparkConf().setAppName("pageConvert").setMaster("local[*]")
 26 | 
 27 |     // 创建sparkSession
 28 |     val sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
 29 | 
 30 |     // 获取用户行为数据
 31 |     val sessionId2ActionRDD = getUserVisitAction(sparkSession, taskParam)
 32 | 
 33 |     // pageFlowStr: "1,2,3,4,5,6,7"
 34 |     val pageFlowStr = ParamUtils.getParam(taskParam, Constants.PARAM_TARGET_PAGE_FLOW)
 35 |     // pageFlowArray: Array[Long]  [1,2,3,4,5,6,7]
 36 |     val pageFlowArray = pageFlowStr.split(",")
 37 |     // pageFlowArray.slice(0, pageFlowArray.length - 1): [1,2,3,4,5,6]
 38 |     // pageFlowArray.tail: [2,3,4,5,6,7]
 39 |     // pageFlowArray.slice(0, pageFlowArray.length - 1).zip(pageFlowArray.tail): [(1,2), (2,3) , ..]
 40 |     // targetPageSplit: [1_2, 2_3, 3_4, ...]
 41 |     val targetPageSplit = pageFlowArray.slice(0, pageFlowArray.length - 1).zip(pageFlowArray.tail).map{
 42 |       case (page1, page2) => page1 + "_" + page2
 43 |     }
 44 | 
 45 |     // sessionId2ActionRDD: RDD[(sessionId, action)]
 46 |     val sessionId2GroupRDD = sessionId2ActionRDD.groupByKey()
 47 | 
 48 |     // pageSpllitNumRDD: RDD[(String, 1L)]
 49 |     val pageSpllitNumRDD = sessionId2GroupRDD.flatMap{
 50 |       case (sessionId, iterableAction) =>
 51 |         // item1: action
 52 |         // item2: action
 53 |         // sortList: List[UserVisitAction]
 54 |         val sortList = iterableAction.toList.sortWith((item1, item2) =>{
 55 |           DateUtils.parseTime(item1.action_time).getTime < DateUtils.parseTime(item2.action_time).getTime
 56 |         })
 57 | 
 58 |         // pageList: List[Long]  [1,2,3,4,...]
 59 |         val pageList = sortList.map{
 60 |           case action => action.page_id
 61 |         }
 62 | 
 63 |         // pageList.slice(0, pageList.length - 1): [1,2,3,..,N-1]
 64 |         // pageList.tail: [2,3,4,..,N]
 65 |         // pageList.slice(0, pageList.length - 1).zip(pageList.tail): [(1,2), (2,3), ...]
 66 |         // pageSplit: [1_2, 2_3, ...]
 67 |         val pageSplit = pageList.slice(0, pageList.length - 1).zip(pageList.tail).map{
 68 |           case (page1, page2) => page1 + "_" + page2
 69 |         }
 70 | 
 71 |         val pageSplitFilter = pageSplit.filter{
 72 |           case pageSplit => targetPageSplit.contains(pageSplit)
 73 |         }
 74 | 
 75 |         pageSplitFilter.map{
 76 |           case pageSplit => (pageSplit, 1L)
 77 |         }
 78 |     }
 79 | 
 80 |     // pageSplitCountMap: Map[(pageSplit, count)]
 81 |     val pageSplitCountMap = pageSpllitNumRDD.countByKey()
 82 | 
 83 |     val startPage = pageFlowArray(0).toLong
 84 | 
 85 |     val startPageCount = sessionId2ActionRDD.filter{
 86 |       case (sessionId, action) => action.page_id == startPage
 87 |     }.count()
 88 | 
 89 |     getPageConvert(sparkSession, taskUUID, targetPageSplit, startPageCount, pageSplitCountMap)
 90 | 
 91 |   }
 92 | 
 93 |   def getPageConvert(sparkSession: SparkSession,
 94 |                      taskUUID: String,
 95 |                      targetPageSplit: Array[String],
 96 |                      startPageCount: Long,
 97 |                      ageSplitCountMap: collection.Map[String, Long]): Unit = {
 98 | 
 99 |     val pageSplitRatio = new mutable.HashMap[String, Double]()
100 | 
101 |     var lastPageCount = startPageCount.toDouble
102 | 
103 |     // 1,2,3,4,5,6,7
104 |     // 1_2,2_3,...
105 |     for(pageSplit <- targetPageSplit){
106 |       // 第一次循环： lastPageCount: page1   currentPageSplitCount: page1_page2      结果：page1_page2
107 |       val currentPageSplitCount = ageSplitCountMap.get(pageSplit).get.toDouble
108 |       val ratio = currentPageSplitCount / lastPageCount
109 |       pageSplitRatio.put(pageSplit, ratio)
110 |       lastPageCount = currentPageSplitCount
111 |     }
112 | 
113 |     val convertStr = pageSplitRatio.map{
114 |       case (pageSplit, ratio) => pageSplit + "=" + ratio
115 |     }.mkString("|")
116 | 
117 |     val pageSplit = PageSplitConvertRate(taskUUID, convertStr)
118 | 
119 |     val pageSplitRatioRDD = sparkSession.sparkContext.makeRDD(Array(pageSplit))
120 | 
121 |     import sparkSession.implicits._
122 |     pageSplitRatioRDD.toDF().write
123 |       .format("jdbc")
124 |       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
125 |       .option("dbtable", "page_split_convert_rate_0308")
126 |       .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
127 |       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
128 |       .mode(SaveMode.Append)
129 |       .save()
130 | 
131 |   }
132 | 
133 | 
134 |   def getUserVisitAction(sparkSession: SparkSession, taskParam: JSONObject) = {
135 |     val startDate = ParamUtils.getParam(taskParam, Constants.PARAM_START_DATE)
136 |     val endDate = ParamUtils.getParam(taskParam, Constants.PARAM_END_DATE)
137 | 
138 |     val sql = "select * from user_visit_action where date>='" + startDate + "' and date<='" +
139 |       endDate + "'"
140 | 
141 |     import sparkSession.implicits._
142 |     sparkSession.sql(sql).as[UserVisitAction].rdd.map(item => (item.session_id, item))
143 |   }
144 | 
145 | }
146 | 


--------------------------------------------------------------------------------
/commons/target/classes/test/PageStat.scala:
--------------------------------------------------------------------------------
  1 | import java.util.UUID
  2 | 
  3 | import commons.conf.ConfigurationManager
  4 | import commons.constant.Constants
  5 | import commons.model.UserVisitAction
  6 | import commons.utils.{DateUtils, NumberUtils, ParamUtils, StringUtils}
  7 | import net.sf.json.JSONObject
  8 | import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair
  9 | import org.apache.spark.SparkConf
 10 | import org.apache.spark.rdd.RDD
 11 | import org.apache.spark.sql.{SaveMode, SparkSession}
 12 | 
 13 | import scala.collection.mutable
 14 | 
 15 | object PageStat {
 16 | 
 17 |   def main(args: Array[String]): Unit = {
 18 | 
 19 |     // 获取任务限制条件
 20 |     val jsonStr = ConfigurationManager.config.getString(Constants.TASK_PARAMS)
 21 |     val taskParam = JSONObject.fromObject(jsonStr)
 22 | 
 23 |     // 获取唯一主键
 24 |     val taskUUID = UUID.randomUUID.toString
 25 | 
 26 |     // 创建sparkConf
 27 |     val sparkConf = new SparkConf().setAppName("pageStat").setMaster("local[*]")
 28 | 
 29 |     // 创建sparkSession
 30 |     val sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
 31 | 
 32 |     val sessionId2ActionRDD = getActionRDD(sparkSession, taskParam)
 33 | 
 34 |     /* 获取目标访问页面切片 */
 35 | 
 36 |     // 1,2,3,4,5,6,7
 37 |     val pageInfo = ParamUtils.getParam(taskParam, Constants.PARAM_TARGET_PAGE_FLOW)
 38 |     // [1,2,3,4,5,6,7]
 39 |     val pageArray = pageInfo.split(",")
 40 |     // pageArray.slice(0, pageArray.length - 1): [1,2,3,4,5,6]
 41 |     // pageArray.tail:[2,3,4,5,6,7]
 42 |     // zip: (1,2),(2,3).....
 43 |     val targetPageFlow = pageArray.slice(0, pageArray.length - 1).zip(pageArray.tail).map{
 44 |       case (item1, item2) => item1 + "_" + item2
 45 |     }
 46 | 
 47 |     /* 获取每一个session的页面访问流 */
 48 | 
 49 |     // 得到一个session所有的行为数据
 50 |     val sessionId2GroupRDD =  sessionId2ActionRDD.groupByKey()
 51 | 
 52 |     // 获取每一个session的页面访问流
 53 |     // 1. 按照action_time对session所有的行为数据进行排序
 54 |     // 2. 通过map操作得到action数据里面的page_id
 55 |     // 3. 得到按时间排列的page_id之后，先转化为页面切片形式
 56 |     // 4. 过滤，将不存在于目标统计页面切片的数据过滤掉
 57 |     // 5. 转化格式为(page1_page2, 1L)
 58 |     val pageId2NumRDD = getPageSplit(sparkSession, targetPageFlow, sessionId2GroupRDD)
 59 | 
 60 |     // 聚合操作
 61 |     // (page1_page2, count)
 62 |     val pageSplitCountMap = pageId2NumRDD.countByKey()
 63 | 
 64 |     val startPage = pageArray(0)
 65 | 
 66 |     val startPageCount = sessionId2ActionRDD.filter{
 67 |       case (sessionId, userVisitAction) =>
 68 |         userVisitAction.page_id == startPage.toLong
 69 |     }.count()
 70 | 
 71 |     // 得到最后的统计结果
 72 |     getPageConvertRate(sparkSession, taskUUID, targetPageFlow, startPageCount, pageSplitCountMap)
 73 |   }
 74 | 
 75 |   def getPageConvertRate(sparkSession: SparkSession,
 76 |                          taskUUID: String,
 77 |                          targetPageFlow:Array[String],
 78 |                          startPageCount: Long,
 79 |                          pageSplitCountMap: collection.Map[String, Long]): Unit = {
 80 | 
 81 |     val pageSplitConvertMap = new mutable.HashMap[String, Double]()
 82 | 
 83 |     var lastPageCount = startPageCount.toDouble
 84 | 
 85 |     for(page <- targetPageFlow){
 86 |       val currentPageCount = pageSplitCountMap.get(page).get.toDouble
 87 |       val rate = NumberUtils.formatDouble(currentPageCount / lastPageCount, 2)
 88 |       pageSplitConvertMap.put(page, rate)
 89 |       lastPageCount = currentPageCount
 90 |     }
 91 | 
 92 |     val convertStr = pageSplitConvertMap.map{
 93 |       case (k,v) => k + "=" + v
 94 |     }.mkString("|")
 95 | 
 96 |     val pageConvert = PageSplitConvertRate(taskUUID, convertStr)
 97 | 
 98 |     val pageConvertRDD = sparkSession.sparkContext.makeRDD(Array(pageConvert))
 99 | 
100 |     import sparkSession.implicits._
101 |     pageConvertRDD.toDF().write
102 |       .format("jdbc")
103 |       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
104 |       .option("dbtable", "page_split_convert_rate1108")
105 |       .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
106 |       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
107 |       .mode(SaveMode.Append)
108 |       .save()
109 |   }
110 | 
111 |   def getPageSplit(sparkSession: SparkSession,
112 |                    targetPageFlow: Array[String],
113 |                    sessionId2GroupRDD: RDD[(String, Iterable[UserVisitAction])]) = {
114 |     sessionId2GroupRDD.flatMap{
115 |       case (sessionId, iterableAction) =>
116 |         // 首先按照时间进行排序
117 |       val sortedAction = iterableAction.toList.sortWith((action1, action2) => {
118 |         DateUtils.parseTime(action1.action_time).getTime <
119 |         DateUtils.parseTime(action2.action_time).getTime
120 |       })
121 | 
122 |      val pageInfo = sortedAction.map(item => item.page_id)
123 | 
124 |      val pageFlow = pageInfo.slice(0, pageInfo.length - 1).zip(pageInfo.tail).map{
125 |        case (page1, page2) => page1 + "_" + page2
126 |      }
127 | 
128 |     val pageSplitFiltered = pageFlow.filter(item => targetPageFlow.contains(item)).map(item => (item, 1L))
129 | 
130 |     pageSplitFiltered
131 |     }
132 | 
133 | 
134 |   }
135 | 
136 | 
137 |   def getActionRDD(sparkSession: SparkSession, taskParam: JSONObject) = {
138 |     val startDate = ParamUtils.getParam(taskParam,  Constants.PARAM_START_DATE)
139 |     val endDate = ParamUtils.getParam(taskParam, Constants.PARAM_END_DATE)
140 | 
141 |     val sql = "select * from user_visit_action where date>='" + startDate + "' and date<='" + endDate + "'"
142 | 
143 |     import sparkSession.implicits._
144 |     sparkSession.sql(sql).as[UserVisitAction].rdd.map(item => (item.session_id, item))
145 |   }
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/commons/target/classes/test/ad.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Navicat Premium Data Transfer
 3 | 
 4 |  Source Server         : localhost
 5 |  Source Server Type    : MySQL
 6 |  Source Server Version : 50720
 7 |  Source Host           : localhost
 8 |  Source Database       : commerce
 9 | 
10 |  Target Server Type    : MySQL
11 |  Target Server Version : 50720
12 |  File Encoding         : utf-8
13 | 
14 |  Date: 11/03/2017 11:23:32 AM
15 | */
16 | 
17 | SET FOREIGN_KEY_CHECKS = 0;
18 | 
19 | -- ----------------------------
20 | --  Table structure for `ad_blacklist`
21 | -- ----------------------------
22 | DROP TABLE IF EXISTS `ad_blacklist`;
23 | CREATE TABLE `ad_blacklist` (
24 |   `userid` int(11) DEFAULT NULL
25 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
26 | 
27 | -- ----------------------------
28 | --  Table structure for `ad_click_trend`
29 | -- ----------------------------
30 | DROP TABLE IF EXISTS `ad_click_trend`;
31 | CREATE TABLE `ad_click_trend` (
32 |   `date` varchar(30) DEFAULT NULL,
33 |   `hour` varchar(30) DEFAULT NULL,
34 |   `minute` varchar(30) DEFAULT NULL,
35 |   `adid` int(11) DEFAULT NULL,
36 |   `clickCount` int(11) DEFAULT NULL
37 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
38 | 
39 | -- ----------------------------
40 | --  Table structure for `ad_province_top3`
41 | -- ----------------------------
42 | DROP TABLE IF EXISTS `ad_province_top3`;
43 | CREATE TABLE `ad_province_top3` (
44 |   `date` varchar(30) DEFAULT NULL,
45 |   `province` varchar(100) DEFAULT NULL,
46 |   `adid` int(11) DEFAULT NULL,
47 |   `clickCount` int(11) DEFAULT NULL
48 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
49 | 
50 | -- ----------------------------
51 | --  Table structure for `ad_stat`
52 | -- ----------------------------
53 | DROP TABLE IF EXISTS `ad_stat`;
54 | CREATE TABLE `ad_stat` (
55 |   `date` varchar(30) DEFAULT NULL,
56 |   `province` varchar(100) DEFAULT NULL,
57 |   `city` varchar(100) DEFAULT NULL,
58 |   `adid` int(11) DEFAULT NULL,
59 |   `clickCount` int(11) DEFAULT NULL
60 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
61 | 
62 | -- ----------------------------
63 | --  Table structure for `ad_user_click_count`
64 | -- ----------------------------
65 | DROP TABLE IF EXISTS `ad_user_click_count`;
66 | CREATE TABLE `ad_user_click_count` (
67 |   `date` varchar(30) DEFAULT NULL,
68 |   `userid` int(11) DEFAULT NULL,
69 |   `adid` int(11) DEFAULT NULL,
70 |   `clickCount` int(11) DEFAULT NULL
71 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
72 | 
73 | 


--------------------------------------------------------------------------------
/mock/mock.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="JAVA_MODULE" version="4" />


--------------------------------------------------------------------------------
/mock/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>shopAnalyze</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mock</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>org.apache.hadoop</groupId>
16 |             <artifactId>hadoop-client</artifactId>
17 |             <version>2.8.5</version>
18 |         </dependency>
19 |         <dependency>
20 |             <groupId>org.apache.hadoop</groupId>
21 |             <artifactId>hadoop-common</artifactId>
22 |             <version>2.8.5</version>
23 |         </dependency>
24 |         <dependency>
25 |             <groupId>org.apache.hadoop</groupId>
26 |             <artifactId>hadoop-hdfs</artifactId>
27 |             <version>2.8.5</version>
28 |         </dependency>
29 |         <dependency>
30 |             <groupId>org.apache.hadoop</groupId>
31 |             <artifactId>hadoop-yarn-common</artifactId>
32 |             <version>2.8.5</version>
33 |         </dependency>
34 |         <dependency>
35 |             <groupId>org.codehaus.janino</groupId>
36 |             <artifactId>janino</artifactId>
37 |             <version>3.0.8</version>
38 |         </dependency>
39 |         <dependency>
40 |             <groupId>org.apache.spark</groupId>
41 |             <artifactId>spark-sql_2.12</artifactId>
42 |             <version>2.4.5</version>
43 |         </dependency>
44 |         <dependency>
45 |             <groupId>mysql</groupId>
46 |             <artifactId>mysql-connector-java</artifactId>
47 |             <version>8.0.20</version>
48 |         </dependency>
49 |         <dependency>
50 |             <groupId>org.example</groupId>
51 |             <artifactId>commons</artifactId>
52 |             <version>${project.version}</version>
53 |         </dependency>
54 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
55 |         <dependency>
56 |             <groupId>org.apache.spark</groupId>
57 |             <artifactId>spark-core_2.12</artifactId>
58 |             <version>2.4.5</version>
59 |         </dependency>
60 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
61 |         <dependency>
62 |             <groupId>org.apache.spark</groupId>
63 |             <artifactId>spark-hive_2.12</artifactId>
64 |             <version>2.4.5</version>
65 |         </dependency>
66 |         <dependency>
67 |             <groupId>org.apache.spark</groupId>
68 |             <artifactId>spark-streaming_2.12</artifactId>
69 |             <version>2.4.5</version>
70 |         </dependency>
71 |         <dependency>
72 |             <groupId>org.apache.spark</groupId>
73 |             <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
74 |             <version>2.4.3</version>
75 |             <exclusions>
76 |                 <exclusion>
77 |                     <artifactId>slf4j-log4j12</artifactId>
78 |                     <groupId>org.slf4j</groupId>
79 |                 </exclusion>
80 |             </exclusions>
81 |         </dependency>
82 |         <dependency>
83 |             <groupId>org.apache.spark</groupId>
84 |             <artifactId>spark-sql_2.12</artifactId>
85 |             <version>2.4.5</version>
86 |         </dependency>
87 |     </dependencies>
88 | 
89 | </project>


--------------------------------------------------------------------------------
/mock/src/main/java/scala/MockDataGenerate.scala:
--------------------------------------------------------------------------------
  1 | package scala
  2 | 
  3 | /*
  4 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
  5 |  */
  6 | 
  7 | import java.util.UUID
  8 | 
  9 | import commons.model.{ProductInfo, UserInfo, UserVisitAction}
 10 | import commons.utils.{DateUtils, StringUtil}
 11 | import org.apache.spark.SparkConf
 12 | import org.apache.spark.sql.{DataFrame, SparkSession}
 13 | 
 14 | import scala.collection.mutable.ArrayBuffer
 15 | import scala.util.Random
 16 | 
 17 | 
 18 | /**
 19 |   * 模拟的数据
 20 |   * date：是当前日期
 21 |   * age: 0 - 59
 22 |   * professionals: professional[0 - 59]
 23 |   * cities: 0 - 9
 24 |   * sex: 0 - 1
 25 |   * keywords: ("火锅", "蛋糕", "重庆辣子鸡", "重庆小面", "呷哺呷哺", "新辣道鱼火锅", "国贸大厦", "太古商场", "日本料理", "温泉")
 26 |   * categoryIds: 0 - 99
 27 |   * ProductId: 0 - 99
 28 |   */
 29 | object MockDataGenerate {
 30 | 
 31 |   /**
 32 |     * 模拟用户行为信息
 33 |     *
 34 |     * @return
 35 |     */
 36 |   private def mockUserVisitActionData(): Array[UserVisitAction] = {
 37 | 
 38 |     val searchKeywords = Array("华为手机", "联想笔记本", "小龙虾", "卫生纸", "吸尘器", "Lamer", "机器学习", "苹果", "洗面奶", "保温杯")
 39 |     // yyyy-MM-dd
 40 |     val date = DateUtils.getTodayDate()
 41 |     // 关注四个行为：搜索、点击、下单、支付
 42 |     val actions = Array("search", "click", "order", "pay")
 43 |     val random = new Random()
 44 |     val rows = ArrayBuffer[UserVisitAction]()
 45 | 
 46 |     // 一共100个用户（有重复）
 47 |     for (i <- 0 to 100) {
 48 |       val userid = random.nextInt(100)
 49 |       // 每个用户产生10个session
 50 |       for (j <- 0 to 10) {
 51 |         // 不可变的，全局的，独一无二的128bit长度的标识符，用于标识一个session，体现一次会话产生的sessionId是独一无二的
 52 |         val sessionid = UUID.randomUUID().toString().replace("-", "")
 53 |         // 在yyyy-MM-dd后面添加一个随机的小时时间（0-23）
 54 |         val baseActionTime = date + " " + random.nextInt(23)
 55 |         // 每个(userid + sessionid)生成0-100条用户访问数据
 56 |         for (k <- 0 to random.nextInt(100)) {
 57 |           val pageid = random.nextInt(10)
 58 |           // 在yyyy-MM-dd HH后面添加一个随机的分钟时间和秒时间
 59 |           val actionTime = baseActionTime + ":" + StringUtil.fulfuill(String.valueOf(random.nextInt(59))) + ":" + StringUtil.fulfuill(String.valueOf(random.nextInt(59)))
 60 |           var searchKeyword: String = null
 61 |           var clickCategoryId: Long = -1L
 62 |           var clickProductId: Long = -1L
 63 |           var orderCategoryIds: String = null
 64 |           var orderProductIds: String = null
 65 |           var payCategoryIds: String = null
 66 |           var payProductIds: String = null
 67 |           val cityid = random.nextInt(10).toLong
 68 |           // 随机确定用户在当前session中的行为
 69 |           val action = actions(random.nextInt(4))
 70 | 
 71 |           // 根据随机产生的用户行为action决定对应字段的值
 72 |           action match {
 73 |             case "search" => searchKeyword = searchKeywords(random.nextInt(10))
 74 |             case "click" => clickCategoryId = random.nextInt(100).toLong
 75 |               clickProductId = String.valueOf(random.nextInt(100)).toLong
 76 |             case "order" => orderCategoryIds = random.nextInt(100).toString
 77 |               orderProductIds = random.nextInt(100).toString
 78 |             case "pay" => payCategoryIds = random.nextInt(100).toString
 79 |               payProductIds = random.nextInt(100).toString
 80 |           }
 81 | 
 82 |           rows += UserVisitAction(date, userid, sessionid,
 83 |             pageid, actionTime, searchKeyword,
 84 |             clickCategoryId, clickProductId,
 85 |             orderCategoryIds, orderProductIds,
 86 |             payCategoryIds, payProductIds, cityid)
 87 |         }
 88 |       }
 89 |     }
 90 |     rows.toArray
 91 |   }
 92 | 
 93 |   /**
 94 |     * 模拟用户信息表
 95 |     *
 96 |     * @return
 97 |     */
 98 |   private def mockUserInfo(): Array[UserInfo] = {
 99 | 
100 |     val rows = ArrayBuffer[UserInfo]()
101 |     val sexes = Array("male", "female")
102 |     val random = new Random()
103 | 
104 |     // 随机产生100个用户的个人信息
105 |     for (i <- 0 to 100) {
106 |       val userid = i
107 |       val username = "user" + i
108 |       val name = "name" + i
109 |       val age = random.nextInt(60)
110 |       val professional = "professional" + random.nextInt(100)
111 |       val city = "city" + random.nextInt(100)
112 |       val sex = sexes(random.nextInt(2))
113 |       rows += UserInfo(userid, username, name, age,
114 |         professional, city, sex)
115 |     }
116 |     rows.toArray
117 |   }
118 | 
119 |   /**
120 |     * 模拟产品数据表
121 |     *
122 |     * @return
123 |     */
124 |   private def mockProductInfo(): Array[ProductInfo] = {
125 | 
126 |     val rows = ArrayBuffer[ProductInfo]()
127 |     val random = new Random()
128 |     val productStatus = Array(0, 1)
129 | 
130 |     // 随机产生100个产品信息
131 |     for (i <- 0 to 100) {
132 |       val productId = i
133 |       val productName = "product" + i
134 |       val extendInfo = "{\"product_status\": " + productStatus(random.nextInt(2)) + "}"
135 | 
136 |       rows += ProductInfo(productId, productName, extendInfo)
137 |     }
138 | 
139 |     rows.toArray
140 |   }
141 | 
142 |   /**
143 |     * 将DataFrame插入到Hive表中
144 |     *
145 |     * @param spark     SparkSQL客户端
146 |     * @param tableName 表名
147 |     * @param dataDF    DataFrame
148 |     */
149 |   private def insertHive(spark: SparkSession, tableName: String, dataDF: DataFrame): Unit = {
150 | //    spark.sql("DROP TABLE IF EXISTS " + tableName)
151 |     dataDF.write.saveAsTable(tableName)
152 |     //dataDF.write.parquet("hdfs://hadoop1:9000/shopAnalyze")
153 |   }
154 | 
155 |   val USER_VISIT_ACTION_TABLE = "user_visit_action"
156 |   val USER_INFO_TABLE = "user_info"
157 |   val PRODUCT_INFO_TABLE = "product_info"
158 | 
159 |   /**
160 |     * 主入口方法
161 |     *
162 |     * @param args 启动参数
163 |     */
164 |   def main(args: Array[String]): Unit = {
165 | 
166 |     // 创建Spark配置
167 |     val sparkConf = new SparkConf().setAppName("MockData").setMaster("local[*]");
168 | 
169 | 
170 |     // 创建Spark SQL 客户端
171 |     val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
172 | 
173 |     // 模拟数据
174 |     val userVisitActionData = this.mockUserVisitActionData()
175 |     val userInfoData = this.mockUserInfo()
176 |     val productInfoData = this.mockProductInfo()
177 | 
178 |     // 将模拟数据装换为RDD
179 |     val userVisitActionRdd = spark.sparkContext.makeRDD(userVisitActionData)
180 |     val userInfoRdd = spark.sparkContext.makeRDD(userInfoData)
181 |     val productInfoRdd = spark.sparkContext.makeRDD(productInfoData)
182 | 
183 |     // 加载SparkSQL的隐式转换支持
184 |     import spark.implicits._
185 | 
186 |     // 将用户访问数据装换为DF保存到Hive表中
187 |     val userVisitActionDF = userVisitActionRdd.toDF()
188 |     userVisitActionDF.show();
189 |     insertHive(spark, USER_VISIT_ACTION_TABLE, userVisitActionDF)
190 | 
191 |     val userInfoDF = userInfoRdd.toDF()
192 |     //userInfoDF.show();
193 |     insertHive(spark, USER_INFO_TABLE, userInfoDF)
194 | 
195 |     // 将产品信息数据转换为DF保存到Hive表中
196 |     val productInfoDF = productInfoRdd.toDF()
197 |     insertHive(spark,PRODUCT_INFO_TABLE,productInfoDF);
198 | 
199 |     spark.close
200 |   }
201 | 
202 | }
203 | 


--------------------------------------------------------------------------------
/mock/src/main/java/scala/MockRealTimeData.scala:
--------------------------------------------------------------------------------
 1 | package scala
 2 | 
 3 | /*
 4 |  * Copyright (c) 2018. Atguigu Inc. All Rights Reserved.
 5 |  */
 6 | 
 7 | import java.util.Properties
 8 | 
 9 | import commons.conf.ConfigurationManager
10 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
11 | 
12 | import scala.collection.mutable.ArrayBuffer
13 | import scala.util.Random
14 | 
15 | object MockRealTimeData {
16 | 
17 |   /**
18 |     * 模拟的数据
19 |     * 时间点: 当前时间毫秒
20 |     * userId: 0 - 99
21 |     * 省份、城市 ID相同 ： 1 - 9
22 |     * adid: 0 - 19
23 |     * ((0L,"北京","北京"),(1L,"上海","上海"),(2L,"南京","江苏省"),(3L,"广州","广东省"),(4L,"三亚","海南省"),(5L,"武汉","湖北省"),(6L,"长沙","湖南省"),(7L,"西安","陕西省"),(8L,"成都","四川省"),(9L,"哈尔滨","东北省"))
24 |     * 格式 ：timestamp province city userid adid
25 |     * 某个时间点 某个省份 某个城市 某个用户 某个广告
26 |     */
27 |   def generateMockData(): Array[String] = {
28 |     val array = ArrayBuffer[String]()
29 |     val random = new Random()
30 |     // 模拟实时数据：
31 |     // timestamp province city userid adid
32 |     for (i <- 0 to 50) {
33 | 
34 |       val timestamp = System.currentTimeMillis()
35 |       val province = random.nextInt(3)
36 |       val city = province
37 |       val adid = random.nextInt(3)
38 |       val userid = random.nextInt(3)
39 | 
40 |       // 拼接实时数据
41 |       array += timestamp + " " + province + " " + city + " " + userid + " " + adid
42 |     }
43 |     array.toArray
44 |   }
45 | 
46 |   def createKafkaProducer(broker: String): KafkaProducer[String, String] = {
47 | 
48 |     // 创建配置对象
49 |     val prop = new Properties()
50 |     // 添加配置
51 |     prop.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, broker)
52 |     prop.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
53 |     prop.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
54 | 
55 |     // 根据配置创建Kafka生产者
56 |     new KafkaProducer[String, String](prop)
57 |   }
58 | 
59 | 
60 |   def main(args: Array[String]): Unit = {
61 | 
62 |     // 获取配置文件commerce.properties中的Kafka配置参数
63 |     val broker = ConfigurationManager.config.getString("kafka.broker.list")
64 |     val topic = ConfigurationManager.config.getString("kafka.topics")
65 | 
66 |     // 创建Kafka消费者
67 |     val kafkaProducer = createKafkaProducer(broker)
68 | 
69 |     while (true) {
70 |       // 随机产生实时数据并通过Kafka生产者发送到Kafka集群中
71 |       for (item <- generateMockData()) {
72 |         kafkaProducer.send(new ProducerRecord[String, String](topic, item))
73 |       }
74 |       println("success");
75 |       Thread.sleep(3000)
76 |     }
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockDataGenerate$$typecreator13$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockDataGenerate$$typecreator13$1.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockDataGenerate$$typecreator21$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockDataGenerate$$typecreator21$1.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockDataGenerate$$typecreator5$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockDataGenerate$$typecreator5$1.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockDataGenerate$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockDataGenerate$.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockDataGenerate.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockDataGenerate.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockRealTimeData$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockRealTimeData$.class


--------------------------------------------------------------------------------
/mock/target/classes/scala/MockRealTimeData.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/mock/target/classes/scala/MockRealTimeData.class


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>org.example</groupId>
  8 |     <artifactId>shopAnalyze</artifactId>
  9 |     <packaging>pom</packaging>
 10 |     <version>1.0-SNAPSHOT</version>
 11 |     <modules>
 12 |         <module>commons</module>
 13 |         <module>session</module>
 14 |         <module>mock</module>
 15 |         <module>adverStat</module>
 16 |     </modules>
 17 | 
 18 |     <name>Maven</name>
 19 |     <!-- FIXME change it to the project's website -->
 20 |     <url>http://maven.apache.org/</url>
 21 |     <inceptionYear>2001</inceptionYear>
 22 | 
 23 |     <distributionManagement>
 24 |         <site>
 25 |             <id>website</id>
 26 |             <url>scp://webhost.company.com/www/website</url>
 27 |         </site>
 28 |     </distributionManagement>
 29 | 
 30 |     <properties>
 31 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 32 |         <maven.compiler.source>12</maven.compiler.source>
 33 |         <maven.compiler.target>12</maven.compiler.target>
 34 |         <junit.version>4.12</junit.version>
 35 |         <lombok.version>1.18.10</lombok.version>
 36 |         <log4j.version>1.2.17</log4j.version>
 37 |         <mysql.version>8.0.18</mysql.version>
 38 |         <druid.version>1.1.16</druid.version>
 39 |         <mybatis.spring.boot.version>2.1.1</mybatis.spring.boot.version>
 40 |     </properties>
 41 | 
 42 |     <dependencyManagement>
 43 |         <dependencies>
 44 |             <!--父工程引入spring cloud alibaba 2.1.0.RELEASE-->
 45 |             <dependency>
 46 |                 <groupId>com.alibaba.cloud</groupId>
 47 |                 <artifactId>spring-cloud-alibaba-dependencies</artifactId>
 48 |                 <version>2.1.0.RELEASE</version>
 49 |                 <type>pom</type>
 50 |                 <scope>import</scope>
 51 |             </dependency>
 52 | 
 53 |             <dependency>
 54 |                 <groupId>org.apache.maven.plugins</groupId>
 55 |                 <artifactId>maven-project-info-reports-plugin</artifactId>
 56 |                 <version>3.0.0</version>
 57 |             </dependency>
 58 |             <!--spring boot 2.2.2-->
 59 |             <dependency>
 60 |                 <groupId>org.springframework.boot</groupId>
 61 |                 <artifactId>spring-boot-dependencies</artifactId>
 62 |                 <version>2.2.2.RELEASE</version>
 63 |                 <type>pom</type>
 64 |                 <scope>import</scope>
 65 |             </dependency>
 66 |             <!--spring cloud Hoxton.SR1-->
 67 |             <dependency>
 68 |                 <groupId>org.springframework.cloud</groupId>
 69 |                 <artifactId>spring-cloud-dependencies</artifactId>
 70 |                 <version>Hoxton.SR1</version>
 71 |                 <type>pom</type>
 72 |                 <scope>import</scope>
 73 |             </dependency>
 74 |             <dependency>
 75 |                 <groupId>com.alibaba.cloud</groupId>
 76 |                 <artifactId>spring-cloud-alibaba-dependencies</artifactId>
 77 |                 <version>2.1.0.RELEASE</version>
 78 |                 <type>pom</type>
 79 |                 <scope>import</scope>
 80 |             </dependency>
 81 |             <!--mysql-->
 82 |             <dependency>
 83 |                 <groupId>mysql</groupId>
 84 |                 <artifactId>mysql-connector-java</artifactId>
 85 |                 <version>${mysql.version}</version>
 86 |                 <scope>runtime</scope>
 87 |             </dependency>
 88 |             <!-- druid-->
 89 |             <dependency>
 90 |                 <groupId>com.alibaba</groupId>
 91 |                 <artifactId>druid</artifactId>
 92 |                 <version>${druid.version}</version>
 93 |             </dependency>
 94 |             <dependency>
 95 |                 <groupId>org.mybatis.spring.boot</groupId>
 96 |                 <artifactId>mybatis-spring-boot-starter</artifactId>
 97 |                 <version>${mybatis.spring.boot.version}</version>
 98 |             </dependency>
 99 |             <!--junit-->
100 |             <dependency>
101 |                 <groupId>junit</groupId>
102 |                 <artifactId>junit</artifactId>
103 |                 <version>${junit.version}</version>
104 |             </dependency>
105 |             <!--log4j-->
106 |             <dependency>
107 |                 <groupId>log4j</groupId>
108 |                 <artifactId>log4j</artifactId>
109 |                 <version>${log4j.version}</version>
110 |             </dependency>
111 |         </dependencies>
112 | 
113 |     </dependencyManagement>
114 |     <!--maven插件-->
115 |     <build>
116 |         <plugins>
117 |             <plugin>
118 |                 <groupId>org.springframework.boot</groupId>
119 |                 <artifactId>spring-boot-maven-plugin</artifactId>
120 |                 <configuration>
121 |                     <fork>true</fork>
122 |                     <addResources>true</addResources>
123 |                 </configuration><!---->
124 |             </plugin>
125 |         </plugins>
126 |     </build>
127 | </project>


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/readme.md


--------------------------------------------------------------------------------
/session/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>shopAnalyze</artifactId>
  7 |         <groupId>org.example</groupId>
  8 |         <version>1.0-SNAPSHOT</version>
  9 |     </parent>
 10 |     <modelVersion>4.0.0</modelVersion>
 11 | 
 12 |     <artifactId>session</artifactId>
 13 | 
 14 |     <dependencies>
 15 |         <dependency>
 16 |             <groupId>org.apache.hadoop</groupId>
 17 |             <artifactId>hadoop-client</artifactId>
 18 |             <version>2.8.5</version>
 19 |         </dependency>
 20 |         <dependency>
 21 |             <groupId>com.fasterxml.jackson.core</groupId>
 22 |             <artifactId>jackson-core</artifactId>
 23 |             <version>2.10.0</version>
 24 |         </dependency>
 25 |         <dependency>
 26 |             <groupId>com.fasterxml.jackson.core</groupId>
 27 |             <artifactId>jackson-annotations</artifactId>
 28 |             <version>2.10.0</version>
 29 |         </dependency>
 30 |         <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
 31 |         <dependency>
 32 |             <groupId>com.fasterxml.jackson.core</groupId>
 33 |             <artifactId>jackson-databind</artifactId>
 34 |             <version>2.10.0</version>
 35 |         </dependency>
 36 | 
 37 |         <!--解析json字符串-->
 38 |         <dependency>
 39 |             <groupId>com.alibaba</groupId>
 40 |             <artifactId>fastjson</artifactId>
 41 |             <version>1.2.36</version>
 42 |         </dependency>
 43 | 
 44 |         <dependency>
 45 |             <groupId>org.apache.hadoop</groupId>
 46 |             <artifactId>hadoop-common</artifactId>
 47 |             <version>2.8.5</version>
 48 |         </dependency>
 49 |         <dependency>
 50 |             <groupId>org.apache.hadoop</groupId>
 51 |             <artifactId>hadoop-hdfs</artifactId>
 52 |             <version>2.8.5</version>
 53 |         </dependency>
 54 |         <dependency>
 55 |             <groupId>commons-beanutils</groupId>
 56 |             <artifactId>commons-beanutils</artifactId>
 57 |             <version>1.9.3</version>
 58 |         </dependency>
 59 |         <dependency>
 60 |             <groupId>org.apache.hadoop</groupId>
 61 |             <artifactId>hadoop-yarn-common</artifactId>
 62 |             <version>2.8.5</version>
 63 |         </dependency>
 64 |         <dependency>
 65 |             <groupId>org.codehaus.janino</groupId>
 66 |             <artifactId>janino</artifactId>
 67 |             <version>3.0.8</version>
 68 |         </dependency>
 69 |         <dependency>
 70 |             <groupId>org.apache.spark</groupId>
 71 |             <artifactId>spark-sql_2.12</artifactId>
 72 |             <version>2.4.5</version>
 73 |         </dependency>
 74 |         <dependency>
 75 |             <groupId>mysql</groupId>
 76 |             <artifactId>mysql-connector-java</artifactId>
 77 |             <version>8.0.20</version>
 78 |         </dependency>
 79 |         <dependency>
 80 |             <groupId>org.example</groupId>
 81 |             <artifactId>commons</artifactId>
 82 |             <version>${project.version}</version>
 83 |         </dependency>
 84 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
 85 |         <dependency>
 86 |             <groupId>org.apache.spark</groupId>
 87 |             <artifactId>spark-core_2.12</artifactId>
 88 |             <version>2.4.5</version>
 89 |         </dependency>
 90 |         <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
 91 |         <dependency>
 92 |             <groupId>org.apache.spark</groupId>
 93 |             <artifactId>spark-hive_2.12</artifactId>
 94 |             <version>2.4.5</version>
 95 |         </dependency>
 96 |         <dependency>
 97 |             <groupId>org.apache.spark</groupId>
 98 |             <artifactId>spark-streaming_2.12</artifactId>
 99 |             <version>2.4.5</version>
100 |         </dependency>
101 |         <dependency>
102 |             <groupId>org.apache.spark</groupId>
103 |             <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
104 |             <version>2.4.3</version>
105 |             <exclusions>
106 |                 <exclusion>
107 |                     <artifactId>slf4j-log4j12</artifactId>
108 |                     <groupId>org.slf4j</groupId>
109 |                 </exclusion>
110 |             </exclusions>
111 |         </dependency>
112 |         <dependency>
113 |             <groupId>org.apache.spark</groupId>
114 |             <artifactId>spark-sql_2.12</artifactId>
115 |             <version>2.4.5</version>
116 |         </dependency>
117 |     </dependencies>
118 | </project>


--------------------------------------------------------------------------------
/session/session.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="JAVA_MODULE" version="4" />


--------------------------------------------------------------------------------
/session/src/main/java/scala/sessionAccumulator.scala:
--------------------------------------------------------------------------------
 1 | package scala
 2 | 
 3 | import org.apache.spark.util.AccumulatorV2
 4 | 
 5 | import scala.collection.mutable
 6 | 
 7 | class sessionAccumulator extends AccumulatorV2[String,mutable.HashMap[String,Int]] {
 8 |   val countMap=new mutable.HashMap[String,Int]();
 9 |   override def isZero: Boolean = {
10 |     countMap.isEmpty;
11 |   }
12 | 
13 |   override def copy(): AccumulatorV2[String, mutable.HashMap[String, Int]] = {
14 |     val acc=new sessionAccumulator;
15 |     acc.countMap++=this.countMap;
16 |     acc
17 |   }
18 | 
19 |   override def reset(): Unit = {
20 |     countMap.clear;
21 |   }
22 | 
23 |   override def add(v: String): Unit = {
24 |     if (!countMap.contains(v)){
25 |       countMap+=(v->0);
26 |     }
27 |     countMap.update(v,countMap(v)+1);
28 |   }
29 | 
30 |   override def merge(other: AccumulatorV2[String, mutable.HashMap[String, Int]])={
31 |     other match{
32 |       case acc:sessionAccumulator=>acc.countMap.foldLeft(this.countMap){
33 |         case(map,(k,v))=>map+=(k->(map.getOrElse(k,0)+v));
34 |       }
35 |     }
36 |   }
37 | 
38 |   override def value: mutable.HashMap[String, Int] = {
39 |     this.countMap;
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/session/src/main/java/scala/sessionStat.scala:
--------------------------------------------------------------------------------
 1 | package scala
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import com.alibaba.fastjson.{JSON, JSONObject}
 6 | import commons.conf.ConfigurationManager
 7 | import commons.constant.Constants
 8 | import org.apache.spark.SparkConf
 9 | import org.apache.spark.sql.SparkSession
10 | import server.{serverFive, serverFour, serverOne, serverThree, serverTwo}
11 | 
12 | object sessionStat {
13 | 
14 | 
15 |   def main(args: Array[String]): Unit = {
16 |     //server
17 |     val oneServer=new serverOne;
18 |     val twoServer=new serverTwo;
19 |     val threeServer=new serverThree;
20 |     val fourServer=new serverFour;
21 |     val fiveServer=new serverFive;
22 |     //sparksession
23 |     val conf=new SparkConf().setAppName("session").setMaster("local[*]");
24 |     val session=SparkSession.builder().config(conf).getOrCreate();
25 |     session.sparkContext.setLogLevel("ERROR");
26 |     //获取配置
27 |     val str=ConfigurationManager.config.getString(Constants.TASK_PARAMS);
28 |     val task:JSONObject=JSON.parseObject(str);
29 |     //主键
30 |     val taskUUID=UUID.randomUUID().toString;
31 | 
32 |     val filterInfo=getFilterFullResult(oneServer,session,task,taskUUID);
33 | 
34 |     //需求二
35 |     //twoServer.GetextraSession(session,filterInfo,task,taskUUID);
36 | 
37 |     //需求三
38 |     val actionRdd=oneServer.basicActions(session,task);
39 |     val sessionId2ActionRDD = actionRdd.map{
40 |       item => (item.session_id, item)
41 |     }
42 |     val sessionId2FilterActionRDD=sessionId2ActionRDD.join(filterInfo).map {
43 |       case (sessionId,(action,info))=>{
44 |         (sessionId,action);
45 |       }
46 |     }
47 |    //val top10Category= threeServer.top10PopularCategories(session,taskUUID,sessionId2FilterActionRDD);
48 |    //需求四
49 | 
50 |     //val top10SessionRDD=fourServer.top10ActiveSession(session,taskUUID,sessionId2FilterActionRDD,top10Category);
51 | 
52 |     //需求五
53 |     fiveServer.getSkipRatio(session,sessionId2FilterActionRDD,taskUUID);
54 | 
55 |   }
56 |   def getFilterFullResult(oneServer: serverOne, session: SparkSession, task: JSONObject,taskUUID:String) ={
57 |     //1.获取基本的action信息
58 |     val basicActions=oneServer.basicActions(session,task);
59 |     //2.根据session聚合信息
60 |     val basicActionMap=basicActions.map(item=>{
61 |       val sessionId=item.session_id;
62 |       (sessionId,item);
63 |     })
64 |     val groupBasicActions=basicActionMap.groupByKey();
65 |     //3.根据每个用户的sessionId->actions,将actions统计成一条str信息
66 |     val aggUserActions=oneServer.AggActionGroup(groupBasicActions);
67 |     //4.读取hadoop文件,获取用户的基本信息
68 |     val userInfo=oneServer.getUserInfo(session);
69 |     //5.根据user_Id,将userInfo的信息插入到aggUserActions,形成更完整的信息
70 |     val finalInfo=oneServer.AggInfoAndActions(aggUserActions,userInfo);
71 |     finalInfo.cache();
72 |     //6.根据common模块里的限制条件过滤数据,跟新累加器
73 |     val accumulator=new sessionAccumulator;
74 |     session.sparkContext.register(accumulator);
75 |     val FilterInfo=oneServer.filterInfo(finalInfo,task,accumulator);
76 |     FilterInfo.count();
77 |     /*
78 |     目前为止,我们已经得到了所有符合条件的过滤总和信息,以及每个范围内的session数量(累加器),
79 |     */
80 |      //7.计算每个范围内的session占比,
81 |     val sessionRatioCount= oneServer.getSessionRatio(session,taskUUID,FilterInfo,accumulator.value);
82 |     FilterInfo;
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/SortKey.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | case class SortKey(clickCount:Long, orderCount:Long, payCount:Long) extends Ordered[SortKey]{
 4 |   // this.compare(that)
 5 |   // this compare that
 6 |   // compare > 0   this > that
 7 |   // compare <0    this < that
 8 |   override def compare(that: SortKey): Int = {
 9 |     if(this.clickCount - that.clickCount != 0){
10 |       return (this.clickCount - that.clickCount).toInt
11 |     }else if(this.orderCount - that.orderCount != 0){
12 |       return (this.orderCount - that.orderCount).toInt
13 |     }else{
14 |       return (this.payCount - that.payCount).toInt
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/serverFive.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import commons.constant.Constants
 4 | import commons.model.UserVisitAction
 5 | import commons.utils.{DateUtils, ParamUtils}
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | import scala.collection.mutable
10 | import scala.collection.mutable.ListBuffer
11 | 
12 | class serverFive  extends Serializable {
13 | 
14 |   def getSkipRatio(session: SparkSession, sessionId2FilterActionRDD: RDD[(String, UserVisitAction)], taskUUID: String)={
15 |        //1.获取目表页面
16 |     val pageFlow=ParamUtils.getPageFlow();
17 | 
18 |     //2.聚合用户信息,获取用户页面跳转统计---countByKey---(page1_page2, count)
19 |     val sessionId2GroupRDD=sessionId2FilterActionRDD.groupByKey();
20 |     val skipCountRDD=getPageSKipCount(session,pageFlow,sessionId2GroupRDD );
21 |     val pageSplitCountMap=skipCountRDD.countByKey();
22 |     //3.计算比列
23 |     getPagesSkipRatio(pageSplitCountMap,session,taskUUID);
24 | 
25 | 
26 |   }
27 |   def getPagesSkipRatio(pageSplitCountMap: collection.Map[String, Long], session: SparkSession, taskUUID: String) = {
28 |     val sum=pageSplitCountMap.values.sum.toDouble;
29 |     val ratios=pageSplitCountMap.map{
30 |       case(k,v)=>{
31 |         val ratio=v/sum;
32 |         (k,ratio);
33 |       }
34 |     }
35 |     ratios.foreach(println);
36 |   }
37 |   def getPageSKipCount(sparkSession: SparkSession,
38 |                    targetPageFlow: Array[String],
39 |                    sessionId2GroupRDD: RDD[(String, Iterable[UserVisitAction])]) = {
40 |     sessionId2GroupRDD.flatMap{
41 |       case(sessionId,actions)=>{
42 |         val sortedActions=actions.toList.sortWith((item1,item2)=>{
43 |           DateUtils.parseTime(item1.action_time).getTime<DateUtils.parseTime(item2.action_time).getTime;
44 |         })
45 |         val pages=sortedActions.map(item=>item.page_id);
46 |         // pageArray.slice(0, pageArray.length - 1): [1,2,3,4,5,6]
47 |         // pageArray.tail:[2,3,4,5,6,7]
48 |         // zip: (1,2),(2,3).....
49 |         val splitPages=pages.slice(0,pages.size-1).zip(pages.tail).map{
50 |           case(page1,page2)=>{
51 |             page1+"-"+page2;
52 |           }
53 |         }
54 | 
55 |         val splitPagesFilter=splitPages.filter(item=>targetPageFlow.contains(item)).map(item=>(item,1L));
56 |         splitPagesFilter
57 |       }
58 |     }
59 |   }
60 | 
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/serverFour.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import commons.constant.Constants
 4 | import commons.model.{Top10Session, UserVisitAction}
 5 | import commons.utils.StringUtil
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | import scala.collection.mutable
10 | 
11 | class serverFour extends Serializable{
12 |   def top10ActiveSession(session: SparkSession, taskUUID: String,
13 |                          sessionId2FilterActionRDD: RDD[(String, UserVisitAction)],
14 |                          top10Category: Array[(SortKey, String)]) = {
15 |     //1.获取top10热门商品的array;
16 |     val top10Arr=top10Category.map{
17 |       case (sortKey,info)=>{
18 |         val cId= StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_CATEGORY_ID).toLong;
19 |         cId
20 |       }
21 |     }
22 |     //2.过滤数据
23 |     val filterRDD=sessionId2FilterActionRDD.filter{
24 |       case (sessionId,action)=>{
25 |         val cId=action.click_category_id;
26 |         top10Arr.contains(cId);
27 |       }
28 |     }
29 |     //3.根据sessionId分组聚合,统计每个用户对每个商品的点击次数,最后结构为(categoryId,sessionId=count)
30 |     val GroupFilterRDD=filterRDD.groupByKey();
31 |     val cid2SessionCountRDD=GroupFilterRDD.flatMap{
32 |       case(sessionId,actions)=>{
33 |         val countMap=new mutable.HashMap[Long,Long];
34 |         for(action<-actions){
35 |           val cId=action.click_category_id;
36 |           if(!countMap.contains(cId)){
37 |             countMap+=(cId->0)
38 |           }
39 |           countMap.update(cId,countMap(cId)+1);
40 |         }
41 |         for((k,v)<-countMap)
42 |           yield(k,session+"="+v);
43 |       }
44 |     }
45 |     //4.groupByKey分组聚合
46 |     val cid2GroupRDD=cid2SessionCountRDD.groupByKey();
47 |     //5.对每个cid对应的列表进行排序操作
48 |     val top10ActiveSession=cid2GroupRDD.flatMap{
49 |       case (cid, iterableSessionCount) =>
50 |         // true: item1放在前面
51 |         // flase: item2放在前面
52 |         // item: sessionCount   String   "sessionId=count"
53 |         val sortList = iterableSessionCount.toList.sortWith((item1, item2) => {
54 |           item1.split("=")(1).toLong > item2.split("=")(1).toLong
55 |         }).take(10)
56 | 
57 |         val top10Session = sortList.map{
58 |           // item : sessionCount   String   "sessionId=count"
59 |           case item =>
60 |             val sessionId = item.split("=")(0)
61 |             val count = item.split("=")(1).toLong
62 |             Top10Session(taskUUID, cid, sessionId, count)
63 |         }
64 | 
65 |         top10Session
66 |     }
67 |     top10ActiveSession.foreach(println);
68 |     top10ActiveSession;
69 |     //6.写入数据库
70 |     /* import sparkSession.implicits._
71 |      top10SessionRDD.toDF().write
72 |        .format("jdbc")
73 |        .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
74 |        .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
75 |        .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
76 |        .option("dbtable", "top10_session_0308")
77 |        .mode(SaveMode.Append)
78 |        .save()*/
79 | 
80 | 
81 |   }
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/serverOne.scala:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import java.util.Date
  4 | 
  5 | import com.alibaba.fastjson.JSONObject
  6 | import commons.conf.ConfigurationManager
  7 | import commons.constant.Constants
  8 | import commons.model.{SessionAggrStat, UserInfo, UserVisitAction}
  9 | import commons.utils.{DateUtils, NumberUtils, StringUtil, ValidUtils}
 10 | import org.apache.commons.lang.StringUtils
 11 | import org.apache.spark.rdd.RDD
 12 | import org.apache.spark.sql.{SaveMode, SparkSession}
 13 | import org.spark_project.jetty.server.Authentication.User
 14 | 
 15 | import scala.collection.mutable
 16 | 
 17 | class serverOne extends Serializable {
 18 |   def getSessionRatio(sparkSession: SparkSession,taskUUID:String, FilterInfo: RDD[(String, String)], value:mutable.HashMap[String,Int]) = {
 19 |     val session_count = value.getOrElse(Constants.SESSION_COUNT, 1).toDouble
 20 | 
 21 |     val visit_length_1s_3s = value.getOrElse(Constants.TIME_PERIOD_1s_3s, 0)
 22 |     val visit_length_4s_6s = value.getOrElse(Constants.TIME_PERIOD_4s_6s, 0)
 23 |     val visit_length_7s_9s = value.getOrElse(Constants.TIME_PERIOD_7s_9s, 0)
 24 |     val visit_length_10s_30s = value.getOrElse(Constants.TIME_PERIOD_10s_30s, 0)
 25 |     val visit_length_30s_60s = value.getOrElse(Constants.TIME_PERIOD_30s_60s, 0)
 26 |     val visit_length_1m_3m = value.getOrElse(Constants.TIME_PERIOD_1m_3m, 0)
 27 |     val visit_length_3m_10m = value.getOrElse(Constants.TIME_PERIOD_3m_10m, 0)
 28 |     val visit_length_10m_30m = value.getOrElse(Constants.TIME_PERIOD_10m_30m, 0)
 29 |     val visit_length_30m = value.getOrElse(Constants.TIME_PERIOD_30m, 0)
 30 | 
 31 |     val step_length_1_3 = value.getOrElse(Constants.STEP_PERIOD_1_3, 0)
 32 |     val step_length_4_6 = value.getOrElse(Constants.STEP_PERIOD_4_6, 0)
 33 |     val step_length_7_9 = value.getOrElse(Constants.STEP_PERIOD_7_9, 0)
 34 |     val step_length_10_30 = value.getOrElse(Constants.STEP_PERIOD_10_30, 0)
 35 |     val step_length_30_60 = value.getOrElse(Constants.STEP_PERIOD_30_60, 0)
 36 |     val step_length_60 = value.getOrElse(Constants.STEP_PERIOD_60, 0)
 37 | 
 38 |     val visit_length_1s_3s_ratio = NumberUtils.formatDouble(visit_length_1s_3s / session_count, 2)
 39 |     val visit_length_4s_6s_ratio = NumberUtils.formatDouble(visit_length_4s_6s / session_count, 2)
 40 |     val visit_length_7s_9s_ratio = NumberUtils.formatDouble(visit_length_7s_9s / session_count, 2)
 41 |     val visit_length_10s_30s_ratio = NumberUtils.formatDouble(visit_length_10s_30s / session_count, 2)
 42 |     val visit_length_30s_60s_ratio = NumberUtils.formatDouble(visit_length_30s_60s / session_count, 2)
 43 |     val visit_length_1m_3m_ratio = NumberUtils.formatDouble(visit_length_1m_3m / session_count, 2)
 44 |     val visit_length_3m_10m_ratio = NumberUtils.formatDouble(visit_length_3m_10m / session_count, 2)
 45 |     val visit_length_10m_30m_ratio = NumberUtils.formatDouble(visit_length_10m_30m / session_count, 2)
 46 |     val visit_length_30m_ratio = NumberUtils.formatDouble(visit_length_30m / session_count, 2)
 47 | 
 48 |     val step_length_1_3_ratio = NumberUtils.formatDouble(step_length_1_3 / session_count, 2)
 49 |     val step_length_4_6_ratio = NumberUtils.formatDouble(step_length_4_6 / session_count, 2)
 50 |     val step_length_7_9_ratio = NumberUtils.formatDouble(step_length_7_9 / session_count, 2)
 51 |     val step_length_10_30_ratio = NumberUtils.formatDouble(step_length_10_30 / session_count, 2)
 52 |     val step_length_30_60_ratio = NumberUtils.formatDouble(step_length_30_60 / session_count, 2)
 53 |     val step_length_60_ratio = NumberUtils.formatDouble(step_length_60 / session_count, 2)
 54 | 
 55 |     //数据封装
 56 |     val stat = SessionAggrStat(taskUUID, session_count.toInt, visit_length_1s_3s_ratio, visit_length_4s_6s_ratio, visit_length_7s_9s_ratio,
 57 |       visit_length_10s_30s_ratio, visit_length_30s_60s_ratio, visit_length_1m_3m_ratio,
 58 |       visit_length_3m_10m_ratio, visit_length_10m_30m_ratio, visit_length_30m_ratio,
 59 |       step_length_1_3_ratio, step_length_4_6_ratio, step_length_7_9_ratio,
 60 |       step_length_10_30_ratio, step_length_30_60_ratio, step_length_60_ratio)
 61 | 
 62 |     val sessionRatioRDD = sparkSession.sparkContext.makeRDD(Array(stat))
 63 | 
 64 |     //写入数据库
 65 |     import sparkSession.implicits._
 66 |     sessionRatioRDD.toDF().write
 67 |       .format("jdbc")
 68 |       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
 69 |       .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
 70 |       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
 71 |       .option("dbtable", "session_stat_ratio_0416")
 72 |       .mode(SaveMode.Append)
 73 |       .save()
 74 |     sessionRatioRDD;
 75 |   }
 76 | 
 77 |   def filterInfo(finalInfo: RDD[(String, String)],task:JSONObject,accumulator:sessionAccumulator) = {
 78 |     //1.获取限制条件
 79 |     //获取限制条件的基本信息
 80 |     val startAge = task.get(Constants.PARAM_START_AGE);
 81 |     val endAge = task.get( Constants.PARAM_END_AGE);
 82 |     val professionals = task.get(Constants.PARAM_PROFESSIONALS)
 83 |     val cities = task.get(Constants.PARAM_CITIES)
 84 |     val sex = task.get(Constants.PARAM_SEX)
 85 |     val keywords =task.get(Constants.PARAM_KEYWORDS)
 86 |     val categoryIds = task.get(Constants.PARAM_CATEGORY_IDS)
 87 | 
 88 |     //拼接基本条件
 89 |     var filterInfo = (if(startAge != null) Constants.PARAM_START_AGE + "=" + startAge + "|" else "") +
 90 |       (if (endAge != null) Constants.PARAM_END_AGE + "=" + endAge + "|" else "") +
 91 |       (if (professionals != null) Constants.PARAM_PROFESSIONALS + "=" + professionals + "|" else "") +
 92 |       (if (cities != null) Constants.PARAM_CITIES + "=" + cities + "|" else "") +
 93 |       (if (sex != null) Constants.PARAM_SEX + "=" + sex + "|" else "") +
 94 |       (if (keywords != null) Constants.PARAM_KEYWORDS + "=" + keywords + "|" else "") +
 95 |       (if (categoryIds != null) Constants.PARAM_CATEGORY_IDS + "=" + categoryIds else "")
 96 | 
 97 |     if(filterInfo.endsWith("\\|"))
 98 |       filterInfo = filterInfo.substring(0, filterInfo.length - 1)
 99 | 
100 |     finalInfo.filter{
101 |           case (sessionId,fullInfo)=>{
102 |             var success=true;
103 |             if(!ValidUtils.between(fullInfo, Constants.FIELD_AGE, filterInfo, Constants.PARAM_START_AGE, Constants.PARAM_END_AGE)){
104 |               success = false
105 |             }else if(!ValidUtils.in(fullInfo, Constants.FIELD_PROFESSIONAL, filterInfo, Constants.PARAM_PROFESSIONALS)){
106 |               success = false
107 |             }else if(!ValidUtils.in(fullInfo, Constants.FIELD_CITY, filterInfo, Constants.PARAM_CITIES)){
108 |               success = false
109 |             }else if(!ValidUtils.equal(fullInfo, Constants.FIELD_SEX, filterInfo, Constants.PARAM_SEX)){
110 |               success = false
111 |             }else if(!ValidUtils.in(fullInfo, Constants.FIELD_SEARCH_KEYWORDS, filterInfo, Constants.PARAM_KEYWORDS)){
112 |               success = false
113 |             }else if(!ValidUtils.in(fullInfo, Constants.FIELD_CLICK_CATEGORY_IDS, filterInfo, Constants.PARAM_CATEGORY_IDS)){
114 |               success = false
115 |             }
116 |             //跟新累加器
117 |             if (success){
118 |               //先累加总的session数量
119 |               accumulator.add(Constants.SESSION_COUNT);
120 |               val visitLength=StringUtil.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_VISIT_LENGTH).toLong;
121 |               val stepLength=StringUtil.getFieldFromConcatString(fullInfo,"\\|",Constants.FIELD_STEP_LENGTH).toLong;
122 | 
123 |               calculateVisitLength(visitLength,accumulator);
124 |               calculateStepLength(stepLength,accumulator);
125 |             }
126 |             success;
127 |           }
128 |         }
129 |   }
130 |   def calculateVisitLength(visitLength: Long, sessionStatisticAccumulator: sessionAccumulator) = {
131 |     if(visitLength >= 1 && visitLength <= 3){
132 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_1s_3s)
133 |     }else if(visitLength >=4 && visitLength  <= 6){
134 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_4s_6s)
135 |     }else if (visitLength >= 7 && visitLength <= 9) {
136 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_7s_9s)
137 |     } else if (visitLength >= 10 && visitLength <= 30) {
138 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_10s_30s)
139 |     } else if (visitLength > 30 && visitLength <= 60) {
140 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_30s_60s)
141 |     } else if (visitLength > 60 && visitLength <= 180) {
142 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_1m_3m)
143 |     } else if (visitLength > 180 && visitLength <= 600) {
144 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_3m_10m)
145 |     } else if (visitLength > 600 && visitLength <= 1800) {
146 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_10m_30m)
147 |     } else if (visitLength > 1800) {
148 |       sessionStatisticAccumulator.add(Constants.TIME_PERIOD_30m)
149 |     }
150 |   }
151 | 
152 |   def calculateStepLength(stepLength: Long, sessionStatisticAccumulator: sessionAccumulator) = {
153 |     if(stepLength >=1 && stepLength <=3){
154 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_1_3)
155 |     }else if (stepLength >= 4 && stepLength <= 6) {
156 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_4_6)
157 |     } else if (stepLength >= 7 && stepLength <= 9) {
158 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_7_9)
159 |     } else if (stepLength >= 10 && stepLength <= 30) {
160 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_10_30)
161 |     } else if (stepLength > 30 && stepLength <= 60) {
162 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_30_60)
163 |     } else if (stepLength > 60) {
164 |       sessionStatisticAccumulator.add(Constants.STEP_PERIOD_60)
165 |     }
166 |   }
167 |   def getUserInfo(session: SparkSession) = {
168 |     import session.implicits._;
169 |     val ds=session.read.parquet("hdfs://hadoop1:9000/data/user_Info").as[UserInfo].map(item=>(item.user_id,item));
170 |     ds.rdd;
171 |   }
172 | 
173 |   def basicActions(session:SparkSession,task:JSONObject)={
174 |        import session.implicits._;
175 |        val df=session.read.parquet("hdfs://hadoop1:9000/data/user_visit_action").as[UserVisitAction];
176 |        df.filter(item=>{
177 |          val date=item.action_time;
178 |          val start=task.getString(Constants.PARAM_START_DATE);
179 |          val end=task.getString(Constants.PARAM_END_DATE);
180 |          date>=start&&date<=end;
181 |        })
182 |        df.rdd;
183 |      }
184 | 
185 |   def AggActionGroup(groupBasicActions: RDD[(String, Iterable[UserVisitAction])])={
186 |        groupBasicActions.map{
187 |          case (sessionId,actions)=>{
188 |            var userId = -1L
189 | 
190 |            var startTime:Date = null
191 |            var endTime:Date = null
192 | 
193 |            var stepLength = 0
194 | 
195 |            val searchKeywords = new StringBuffer("")
196 |            val clickCategories = new StringBuffer("")
197 | 
198 |            //循环遍历actions,更新信息
199 |            for (action<-actions){
200 |              if(userId == -1L){
201 |                userId=action.user_id;
202 |              }
203 |              val time=DateUtils.parseTime(action.action_time);
204 |              if (startTime==null||startTime.after(time))startTime=time;
205 |              if (endTime==null||endTime.before(time))endTime=time;
206 | 
207 |              val key=action.search_keyword;
208 | 
209 |              if (!StringUtils.isEmpty(key) && !searchKeywords.toString.contains(key))searchKeywords.append(key+",");
210 | 
211 |              val click=action.click_category_id;
212 |              if ( click!= -1L && clickCategories.toString.contains(click))searchKeywords.append(click+",");
213 | 
214 |              stepLength+=1;
215 | 
216 |            }
217 |            // searchKeywords.toString.substring(0, searchKeywords.toString.length)
218 |            val searchKw = StringUtil.trimComma(searchKeywords.toString)
219 |            val clickCg = StringUtil.trimComma(clickCategories.toString)
220 | 
221 |            val visitLength = (endTime.getTime - startTime.getTime) / 1000
222 | 
223 |            val aggrInfo = Constants.FIELD_SESSION_ID + "=" + sessionId + "|" +
224 |              Constants.FIELD_SEARCH_KEYWORDS + "=" + searchKw + "|" +
225 |              Constants.FIELD_CLICK_CATEGORY_IDS + "=" + clickCg + "|" +
226 |              Constants.FIELD_VISIT_LENGTH + "=" + visitLength + "|" +
227 |              Constants.FIELD_STEP_LENGTH + "=" + stepLength + "|" +
228 |              Constants.FIELD_START_TIME + "=" + DateUtils.formatTime(startTime)
229 | 
230 |            (userId, aggrInfo)
231 | 
232 |          }
233 |        }
234 |   }
235 | 
236 |   def AggInfoAndActions(aggUserActions: RDD[(Long, String)], userInfo: RDD[(Long, UserInfo)])={
237 |     //根据user_id建立映射关系===>用Join算子
238 |     userInfo.join(aggUserActions).map{
239 |       case (userId,(userInfo: UserInfo,aggrInfo))=>{
240 |         val age = userInfo.age
241 |         val professional = userInfo.professional
242 |         val sex = userInfo.sex
243 |         val city = userInfo.city
244 |         val fullInfo = aggrInfo + "|" +
245 |           Constants.FIELD_AGE + "=" + age + "|" +
246 |           Constants.FIELD_PROFESSIONAL + "=" + professional + "|" +
247 |           Constants.FIELD_SEX + "=" + sex + "|" +
248 |           Constants.FIELD_CITY + "=" + city
249 | 
250 |         val sessionId = StringUtil.getFieldFromConcatString(aggrInfo, "\\|", Constants.FIELD_SESSION_ID)
251 | 
252 |         (sessionId, fullInfo)
253 |       }
254 |     }
255 |   }
256 | 
257 | }
258 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/serverThree.scala:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import commons.constant.Constants
  4 | import commons.model.{Top10Category, UserVisitAction}
  5 | import commons.utils.StringUtil
  6 | import org.apache.spark.rdd.RDD
  7 | import org.apache.spark.sql.SparkSession
  8 | 
  9 | import scala.collection.mutable.ArrayBuffer
 10 | 
 11 | class serverThree  extends Serializable {
 12 | 
 13 | 
 14 | 
 15 | 
 16 |   def top10PopularCategories(sparkSession: SparkSession,
 17 |                              taskUUID: String,
 18 |                              sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
 19 |     //1.将所有基本数据,转化成(cId,cId)格式的总数据
 20 |     var cid2CidRdd=sessionId2FilterActionRDD.flatMap{
 21 |       case(sessionId,action: UserVisitAction)=>{
 22 |         val categoryBuffer=new ArrayBuffer[(Long,Long)]();
 23 |         // 点击行为
 24 |         if(action.click_category_id != -1){
 25 |           categoryBuffer += ((action.click_category_id, action.click_category_id))
 26 |         }else if(action.order_category_ids != null){
 27 |           for(orderCid <- action.order_category_ids.split(","))
 28 |             categoryBuffer += ((orderCid.toLong, orderCid.toLong))
 29 |         }else if(action.pay_category_ids != null){
 30 |           for(payCid <- action.pay_category_ids.split(","))
 31 |             categoryBuffer += ((payCid.toLong, payCid.toLong))
 32 |         }
 33 |         categoryBuffer
 34 |       }
 35 |     }
 36 |     cid2CidRdd=cid2CidRdd.distinct();
 37 |     // 第二步：统计品类的点击次数、下单次数、付款次数
 38 |     val cid2ClickCountRDD = getClickCount(sessionId2FilterActionRDD)
 39 | 
 40 |     val cid2OrderCountRDD = getOrderCount(sessionId2FilterActionRDD)
 41 | 
 42 |     val cid2PayCountRDD = getPayCount(sessionId2FilterActionRDD)
 43 | 
 44 |     //3.根据左连接,将总的数据cid2CidRdd和第二部得到的数据一个个进行连接,创造出cid:str
 45 |     //其中,str代表count=32|order=15.......
 46 |     val cid2FullCountRDD =  getFullCount(cid2CidRdd,cid2ClickCountRDD,cid2OrderCountRDD,cid2PayCountRDD);
 47 | 
 48 |     //4.自定义排序器,将数据转化为(sortKey,info)
 49 |     val sortRDD=cid2FullCountRDD.map{
 50 |       case (cId,info)=>{
 51 |         val clickCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_CLICK_COUNT).toLong
 52 |         val orderCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_ORDER_COUNT).toLong
 53 |         val payCount = StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_PAY_COUNT).toLong
 54 | 
 55 |         val sortKey = SortKey(clickCount, orderCount, payCount)
 56 |         (sortKey, info)
 57 |       }
 58 |     }
 59 |     //5.排序
 60 |     val top10=sortRDD.sortByKey(false).take(10);
 61 |     //6.封装数据,写进数据库
 62 |     val top10CategoryRDD = sparkSession.sparkContext.makeRDD(top10).map{
 63 |       case (sortKey, countInfo) =>
 64 |         val cid = StringUtil.getFieldFromConcatString(countInfo, "\\|", Constants.FIELD_CATEGORY_ID).toLong
 65 |         val clickCount = sortKey.clickCount
 66 |         val orderCount = sortKey.orderCount
 67 |         val payCount = sortKey.payCount
 68 |         Top10Category(taskUUID, cid, clickCount, orderCount, payCount)
 69 |     }
 70 | 
 71 |     //保存到数据库
 72 |     /* import sparkSession.implicits._
 73 |      top10CategoryRDD.toDF().write
 74 |        .format("jdbc")
 75 |        .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
 76 |        .option("user", ConfigurationManager.config.getString(Constants.JDBC_USER))
 77 |        .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
 78 |        .option("dbtable", "top10_category_0308")
 79 |        .mode(SaveMode.Append)
 80 |        .save*/
 81 |     top10
 82 | 
 83 |   }
 84 | 
 85 |   def getFullCount(cid2CidRDD: RDD[(Long, Long)], cid2ClickCountRDD: RDD[(Long, Long)], cid2OrderCountRDD: RDD[(Long, Long)], cid2PayCountRDD: RDD[(Long, Long)]) = {
 86 |     val cid2ClickInfoRDD=cid2CidRDD.leftOuterJoin(cid2ClickCountRDD).map{
 87 |       case (cId,(categoryId,option))=>{
 88 |         val clickCount=if (option.isDefined)option.getOrElse(0);
 89 |         val aggrCount = Constants.FIELD_CATEGORY_ID + "=" + cId + "|" +
 90 |           Constants.FIELD_CLICK_COUNT + "=" + clickCount
 91 | 
 92 |         (cId, aggrCount)
 93 |       }
 94 |     }
 95 |     val cid2OrderInfoRDD = cid2ClickInfoRDD.leftOuterJoin(cid2OrderCountRDD).map{
 96 |       case (cid, (clickInfo, option)) =>
 97 |         val orderCount = if(option.isDefined) option.get else 0
 98 |         val aggrInfo = clickInfo + "|" +
 99 |           Constants.FIELD_ORDER_COUNT + "=" + orderCount
100 | 
101 |         (cid, aggrInfo)
102 |     }
103 | 
104 |     val cid2PayInfoRDD = cid2OrderInfoRDD.leftOuterJoin(cid2PayCountRDD).map{
105 |       case (cid, (orderInfo, option)) =>
106 |         val payCount = if(option.isDefined) option.get else 0
107 |         val aggrInfo = orderInfo + "|" +
108 |           Constants.FIELD_PAY_COUNT + "=" + payCount
109 |         (cid, aggrInfo)
110 |     }
111 |     cid2PayInfoRDD;
112 | 
113 |   }
114 | 
115 | 
116 |   def getClickCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
117 |      val clickFilterRDD=sessionId2FilterActionRDD.filter{
118 |        case (sessionId,action: UserVisitAction)=>{
119 |           action.click_category_id != -1L;
120 |        }
121 |      }
122 |     val clickNumRDD = clickFilterRDD.map{
123 |       case (sessionId, action) => (action.click_category_id, 1L)
124 |     }
125 | 
126 |     clickNumRDD.reduceByKey(_+_)
127 |   }
128 |   def getOrderCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)])={
129 |      val orderFilterRDD=sessionId2FilterActionRDD.filter(item=>item._2.order_category_ids!=null)
130 |      val orderNumRDD=orderFilterRDD.flatMap{
131 |        case (sessionId,action)=>{
132 | 
133 |           for(id<-action.order_category_ids.split(",")){
134 | 
135 |           }
136 |          action.order_category_ids.split(",").map(item=>(item.toLong,1L));
137 |        }
138 |      }
139 |     orderNumRDD.reduceByKey(_+_);
140 |   }
141 |   def getPayCount(sessionId2FilterActionRDD: RDD[(String, UserVisitAction)]) = {
142 |     val payFilterRDD = sessionId2FilterActionRDD.filter(item => item._2.pay_category_ids != null)
143 | 
144 |     val payNumRDD = payFilterRDD.flatMap{
145 |       case (sid, action) =>
146 |         action.pay_category_ids.split(",").map(item => (item.toLong, 1L))
147 |     }
148 | 
149 |     payNumRDD.reduceByKey(_+_)
150 |   }
151 | 
152 | }
153 | 


--------------------------------------------------------------------------------
/session/src/main/java/server/serverTwo.scala:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import com.alibaba.fastjson.JSONObject
  4 | import commons.conf.ConfigurationManager
  5 | import commons.constant.Constants
  6 | import commons.model.SessionRandomExtract
  7 | import commons.utils.{DateUtils, StringUtil}
  8 | import org.apache.spark.rdd.RDD
  9 | import org.apache.spark.sql.{SaveMode, SparkSession}
 10 | 
 11 | import scala.collection.mutable
 12 | import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 13 | import scala.util.Random
 14 | 
 15 | class serverTwo extends Serializable {
 16 | 
 17 | 
 18 |   def generateRandomIndexList(extractDay: Int, oneDay: Long, hourCountMap: mutable.HashMap[String, Long], hourListMap: mutable.HashMap[String, ListBuffer[Int]])={
 19 |     //计算每个小时要抽取多少条数据
 20 |     for ((hour,cnt)<-hourCountMap){
 21 |       val curHour=((cnt/oneDay)*extractDay).toInt;
 22 |       val Random=new Random();
 23 |       hourListMap.get(hour) match {
 24 |         case None => hourListMap(hour)=new ListBuffer[Int];
 25 |           for (i<-0 until curHour.toInt){
 26 |             var index=Random.nextInt(cnt.toInt);
 27 |             while(hourListMap(hour).contains(index)){
 28 |               index=Random.nextInt(cnt.toInt);
 29 |             }
 30 |             hourListMap(hour).append(index);
 31 |           }
 32 | 
 33 |         case Some(value) =>
 34 |           for (i<-0 until curHour.toInt){
 35 |             var index=Random.nextInt(cnt.toInt);
 36 |             while(hourListMap(hour).contains(index)){
 37 |               index=Random.nextInt(cnt.toInt);
 38 |             }
 39 |             hourListMap(hour).append(index);
 40 | 
 41 |           }
 42 |       }
 43 |     }
 44 |   }
 45 | 
 46 |   def GetextraSession(session: SparkSession, filterInfo: RDD[(String,String)], task: JSONObject, taskUUID: String)={
 47 |      //1.数据格式转化成(date,info)
 48 |      val dateHour2FullInfoRDD=filterInfo.map{
 49 |        case (sessionId,info)=>{
 50 |          val date1=StringUtil.getFieldFromConcatString(info, "\\|", Constants.FIELD_START_TIME)
 51 |          val date=DateUtils.getDateHour(date1);
 52 |          (date,info);
 53 |        }
 54 |      }
 55 |     //2.统计同一时间总共的session数量,结果为map结构
 56 |     val hourCountMap=dateHour2FullInfoRDD.countByKey();
 57 | 
 58 |     //3.将数据转化为date->map(hour,count)类型
 59 |     val dataHourCount=new mutable.HashMap[String,mutable.HashMap[String,Long]];
 60 |     for ((k,v)<-hourCountMap){
 61 |       val day=k.split("_")(0);
 62 |       val hour=k.split("_")(1);
 63 |       dataHourCount.get(day) match {
 64 |         case None =>dataHourCount(day)=new mutable.HashMap[String,Long];
 65 |           dataHourCount(day)+=(hour->v);
 66 |         case Some(value) =>
 67 |           dataHourCount(day)+=(hour->v);
 68 |       }
 69 |     }
 70 |     //4.获取抽取session的索引,用map(date,map(hour,list))来存储
 71 |     val ExtractIndexListMap=new mutable.HashMap[String,mutable.HashMap[String,ListBuffer[Int]]];
 72 |     val sumday=dataHourCount.size;
 73 |     val extractDay=100/sumday;//平均每天
 74 | 
 75 |     for ((day,map)<-dataHourCount){
 76 |        val oneDay=map.values.sum;
 77 |       ExtractIndexListMap.get(day) match {
 78 |         case None => ExtractIndexListMap(day)=new mutable.HashMap[String, ListBuffer[Int]]
 79 |           generateRandomIndexList(extractDay, oneDay, map,  ExtractIndexListMap(day))
 80 |         case Some(value) =>
 81 |           generateRandomIndexList(extractDay, oneDay, map,  ExtractIndexListMap(day))
 82 |       }
 83 |     }
 84 |      /*
 85 |      到目前,我们已经得到了:
 86 |      1.每一个小时里总共有多少条session->dataHourCount
 87 |      2.每一个小时要抽取的session的索引->ExtractIndexListMap
 88 |       */
 89 |     //5.根据ExtractIndexListMap抽取session
 90 |     val dateHour2GroupRDD = dateHour2FullInfoRDD.groupByKey()
 91 |     val extractSessionRDD=dateHour2GroupRDD.flatMap{
 92 |       case (dateHour,iterableFullInfo)=>{
 93 |         val day = dateHour.split("_")(0)
 94 |         val hour = dateHour.split("_")(1)
 95 |         val indexList=ExtractIndexListMap.get(day).get(hour);
 96 |         val extractSessionArrayBuffer = new ArrayBuffer[SessionRandomExtract]()
 97 | 
 98 |         var index = 0
 99 | 
100 |         for(fullInfo <- iterableFullInfo){
101 |           if(indexList.contains(index)){
102 |             val sessionId = StringUtil.getFieldFromConcatString(fullInfo, "\\|", Constants.FIELD_SESSION_ID)
103 |             val startTime = StringUtil.getFieldFromConcatString(fullInfo, "\\|",Constants.FIELD_START_TIME)
104 |             val searchKeywords = StringUtil.getFieldFromConcatString(fullInfo, "\\|", Constants.FIELD_SEARCH_KEYWORDS)
105 |             val clickCategories = StringUtil.getFieldFromConcatString(fullInfo, "\\|", Constants.FIELD_CLICK_CATEGORY_IDS)
106 | 
107 |             val extractSession = SessionRandomExtract(taskUUID , sessionId, startTime, searchKeywords, clickCategories)
108 | 
109 |             extractSessionArrayBuffer += extractSession
110 |           }
111 |           index += 1
112 |         }
113 |         extractSessionArrayBuffer
114 | 
115 |       }
116 |     }
117 |     extractSessionRDD.foreach(println);
118 |     //6.写进数据库
119 |     /*import session.implicits._;
120 |     extractSessionRDD.toDF().write
121 |     .format("jdbc")
122 |       .option("url", ConfigurationManager.config.getString(Constants.JDBC_URL))
123 |       .option("user",ConfigurationManager.config.getString(Constants.JDBC_USER))
124 |       .option("password", ConfigurationManager.config.getString(Constants.JDBC_PASSWORD))
125 |       .option("dbtable", "session_extract_0308")
126 |       .mode(SaveMode.Append)
127 |       .save()*/
128 |   }
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/session/target/classes/META-INF/session.kotlin_module:
--------------------------------------------------------------------------------
1 |             


--------------------------------------------------------------------------------
/session/target/classes/scala/sessionAccumulator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/scala/sessionAccumulator.class


--------------------------------------------------------------------------------
/session/target/classes/scala/sessionStat$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/scala/sessionStat$.class


--------------------------------------------------------------------------------
/session/target/classes/scala/sessionStat.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/scala/sessionStat.class


--------------------------------------------------------------------------------
/session/target/classes/server/SortKey$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/SortKey$.class


--------------------------------------------------------------------------------
/session/target/classes/server/SortKey.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/SortKey.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverFive.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverFive.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverFour.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverFour.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverOne$$typecreator4$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverOne$$typecreator4$1.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverOne$$typecreator4$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverOne$$typecreator4$2.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverOne$$typecreator5$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverOne$$typecreator5$1.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverOne$$typecreator5$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverOne$$typecreator5$2.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverOne.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverOne.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverThree.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverThree.class


--------------------------------------------------------------------------------
/session/target/classes/server/serverTwo.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongyupei/spark-shopAnalyze/6631b38a4858ceec792e72f4cfc05f1e00f8d90e/session/target/classes/server/serverTwo.class


--------------------------------------------------------------------------------