├── .settings
    └── org.eclipse.m2e.core.prefs
├── 01.png
├── 02.png
├── 03.png
├── 04.png
├── 05.png
├── 06.png
├── 07.png
├── 08.png
├── 09.png
├── README.md
├── Recommendation
    ├── .settings
    │   └── org.eclipse.m2e.core.prefs
    ├── ContentRecommendation
    │   ├── .settings
    │   │   ├── org.eclipse.jdt.core.prefs
    │   │   └── org.eclipse.m2e.core.prefs
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── resources
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── com
    │   │               └── z
    │   │                   └── content
    │   │                       └── ContentRecommender.scala
    ├── ItemCFRecommendation
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── resources
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── com
    │   │               └── z
    │   │                   └── itemcf
    │   │                       └── ItemCFRecommender.scala
    ├── KafkaStream
    │   ├── .settings
    │   │   ├── org.eclipse.core.resources.prefs
    │   │   ├── org.eclipse.jdt.core.prefs
    │   │   └── org.eclipse.m2e.core.prefs
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── java
    │   │           └── com
    │   │           │   └── z
    │   │           │       └── kafkastream
    │   │           │           ├── Application.java
    │   │           │           ├── LogProcessor.java
    │   │           │           └── MyEventTimeExtractor.java
    │   │       └── resources
    │   │           └── log4j.properties
    ├── OfflineRecommendation
    │   ├── .settings
    │   │   ├── org.eclipse.jdt.core.prefs
    │   │   └── org.eclipse.m2e.core.prefs
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── resources
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── com
    │   │               └── z
    │   │                   └── offline
    │   │                       ├── ALSTrainer.scala
    │   │                       └── OfflineRecommender.scala
    ├── StatisticsRecommendation
    │   ├── .settings
    │   │   ├── org.eclipse.jdt.core.prefs
    │   │   └── org.eclipse.m2e.core.prefs
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── resources
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── com
    │   │               └── z
    │   │                   └── statistics
    │   │                       └── StatisticsRecommender.scala
    ├── StreamingRecommendation
    │   ├── .settings
    │   │   ├── org.eclipse.jdt.core.prefs
    │   │   └── org.eclipse.m2e.core.prefs
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       ├── resources
    │   │           └── log4j.properties
    │   │       └── scala
    │   │           └── com
    │   │               └── z
    │   │                   └── streaming
    │   │                       └── StreamingRecommender.scala
    └── pom.xml
├── pom.xml
├── readme.docx
└── ~$readme.docx


/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/01.png


--------------------------------------------------------------------------------
/02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/02.png


--------------------------------------------------------------------------------
/03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/03.png


--------------------------------------------------------------------------------
/04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/04.png


--------------------------------------------------------------------------------
/05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/05.png


--------------------------------------------------------------------------------
/06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/06.png


--------------------------------------------------------------------------------
/07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/07.png


--------------------------------------------------------------------------------
/08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/08.png


--------------------------------------------------------------------------------
/09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/09.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ***源码：https://github.com/wolf-song-ml/RecommendationSystem***
  2 | ## 实战篇
  3 | 
  4 | ## 1 项目技术架构
  5 | ![项目技术架构](https://github.com/wolf-song-ml/RecommendationSystem/blob/master/01.png)
  6 | 
  7 | ## 2 项目涉及关键技术
  8 | 
  9 |  - Redis：存储用户最近评测队列
 10 |  -  Mongdb：BI可视化查询 
 11 |  - Elastic Search：文本关键词模糊检索索引、类别完全匹配检索、More like this基于内容推荐api
 12 |  -  Flume：实时评测数据采集
 13 |  - Kafka：采集数据中间消息通道 Kafka stream：消息转发中间管道
 14 |  -  Spark：spark sql、spark   stream、spark M数据统计、加载数据源引擎、机器学习模型
 15 |  -  ScalaNLP：JAVA矩阵计算
 16 | 
 17 | ## 理论篇
 18 | 
 19 | ## 1 推荐系统的意义 - 解决信息过载
 20 | 
 21 |  - 搜索引擎时代
 22 | 
 23 | 分类导航：雅虎
 24 | 搜索：谷歌、百度
 25 | 
 26 |  - 个性化时代(提高用户粘度、增加营收)
 27 | 
 28 | 系统自动推荐相关的东西：今日头条、豆瓣、电商
 29 | 
 30 | ## 2 推荐系统的分类
 31 | 
 32 |  - 基于人口统计学的推荐
 33 |  - 基于内容的推荐
 34 | 
 35 | - 基于协同过滤的推荐
 36 | 
 37 | ## 3 基于人口统计学的推荐
 38 | 
 39 | 基于人口统计学的推荐机制（Demographic-based Recommendation）是一种最易于实现的推荐方法，它只是简单的根据系统用户的基本信息发现用户的相关程度，然后将相似用户喜爱的其他物品推荐给当前用户。
 40 | ![基于人口统计学的推荐](https://upload-images.jianshu.io/upload_images/21415382-be2ff3b26716dbe1?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 41 | 
 42 | ## 4 基于内容的推荐
 43 | 
 44 | ## 4.1 定义
 45 | 
 46 | 基于内容的推荐是在推荐引擎出现之初应用最为广泛的推荐机制，它的核心思想是根据推荐物品或内容的元数据，发现物品或者内容的相关性，然后基于用户以往的喜好记录，推荐给用户相似的物品。
 47 | 
 48 | ## 4.2 算法流程
 49 | 
 50 |  - 对于物品的特征提取——打标签（tag）
 51 |  - 对于文本信息的特征提取——关键词
 52 |  - 生成分词特征向量矩阵
 53 |  - 计算相似度，常用余弦相似度
 54 |  ![余弦相似度公式](https://upload-images.jianshu.io/upload_images/21415382-d47a3e4a0aa0dc2f?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 55 | 
 56 | ## 4.3 核心代码
 57 | 
 58 | ## 4.3.1 spark TF-IDF
 59 | 
 60 | ```java
 61 | // 核心部分： 用TF-IDF从内容信息中提取电影特征向量
 62 | // 创建一个分词器，默认按空格分词
 63 | val tokenizer = new Tokenizer().setInputCol("genres").setOutputCol("words")
 64 | 
 65 | // 用分词器对原始数据做转换，生成新的一列words
 66 | val wordsData = tokenizer.transform(movieTagsDF)
 67 | 
 68 | // 引入HashingTF工具，可以把一个词语序列转化成对应的词频
 69 | val hashingTF = new HashingTF().setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(50)
 70 | val featurizedData = hashingTF.transform(wordsData)
 71 | 
 72 | // 引入IDF工具，可以得到idf模型
 73 | val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
 74 | // 训练idf模型，得到每个词的逆文档频率
 75 | val idfModel = idf.fit(featurizedData)
 76 | // 用模型对原数据进行处理，得到文档中每个词的tf-idf，作为新的特征向量
 77 | val rescaledData = idfModel.transform(featurizedData)
 78 | 
 79 | val movieRecs = movieFeatures.cartesian(movieFeatures)
 80 |   .filter{
 81 |     // 把自己跟自己的配对过滤掉
 82 |     case (a, b) => a._1 != b._1
 83 |   }
 84 |   .map{
 85 |     case (a, b) => {
 86 |       val simScore = this.consinSim(a._2, b._2)
 87 |       ( a._1, ( b._1, simScore ) )
 88 |     }
 89 |   }
 90 |   .filter(_._2._2 > 0.6)    // 过滤出相似度大于0.6的
 91 |   .groupByKey()
 92 |   .map{
 93 |     case (mid, items) => MovieRecs( mid, items.toList.sortWith(_._2 > _._2).map(x => Recommendation(x._1, x._2)) )
 94 |   }
 95 |   .toDF()
 96 | ```
 97 | 
 98 | ## 4.3.2 ElasticSearch More like this
 99 | 
100 | ```java
101 | MoreLikeThisQueryBuilder query = QueryBuilders.moreLikeThisQuery(
102 |         /*new String[]{"name", "descri", "genres", "actors", "directors", "tags"},*/
103 |         new MoreLikeThisQueryBuilder.Item[]{new MoreLikeThisQueryBuilder.Item(Constant.ES_INDEX,
104 |                 Constant.ES_MOVIE_TYPE, String.valueOf(mid))});
105 | ```
106 | 
107 | ## 5 基于协同过滤的推荐
108 | 
109 | ## 5.1基于用户的协同过滤(UserCF)
110 | 
111 | *计算用户的相似度，推荐相似用户的喜好*
112 | ![](https://upload-images.jianshu.io/upload_images/21415382-4bf944aa309aa852?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
113 | 
114 | ## 5.2 基于物品的协同过滤(ItemCF重点)
115 | 
116 | *计算物品的相似度，推荐相似度高的物品(不同于基于内容的推荐)*  
117 | ![](https://upload-images.jianshu.io/upload_images/21415382-c7cec43faa7254db?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
118 | 
119 | ## 5.2.1核心算法：计算同现相似度
120 | ![同现相似度公式](https://upload-images.jianshu.io/upload_images/21415382-34cc04c19e2361da?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
121 | 
122 | ## 5.2.2 核心算法实例
123 | 
124 | ```java
125 | // ( mid1, (mid2, score) )
126 | val simDF = cooccurrenceDF.map{
127 |   row =>
128 |     val coocSim = cooccurrenceSim( row.getAs[Long]("cocount"), row.getAs[Long]("count1"),
129 |       row.getAs[Long]("count2") )
130 |     ( row.getInt(0), ( row.getInt(1), coocSim ) )
131 | }
132 |   .rdd
133 |   .groupByKey()
134 |   .map{
135 |     case (mid, recs) =>
136 |       MoviesRecs( mid, recs.toList.sortWith(_._2>_._2).take(MAX_RECOMMENDATION)
137 |         .map(x=>Recommendation(x._1,x._2)) )
138 |   }
139 |   .toDF()
140 | ```
141 | 
142 | ## 5.3 基于隐语义算法模型推荐
143 | 
144 | ## 5.3.1 思想
145 | *找到隐藏因子，可以对user和item进行关联*
146 | ![](https://upload-images.jianshu.io/upload_images/21415382-6640b7dbb9a26eca?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
147 | ## 5.3.2 算法公式
148 | ![隐式分解](https://upload-images.jianshu.io/upload_images/21415382-5a091130396d255a?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
149 | ![损失函数最小化求解](https://upload-images.jianshu.io/upload_images/21415382-a87afceaf9624020.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
150 | 
151 | ## 5.3.3 核心算法实例
152 | 
153 | ```java
154 | // 训练隐语义模型：Rating(user:Int, product:Int, rating:Double)
155 | val trainData = ratingRDD.map(x => Rating(x._1, x._2, x._3))
156 | // 多个变量赋值
157 | val (rank, iterations, lambda) = (200, 5, 0.1)
158 | val model = ALS.train(trainData, rank, iterations, lambda)
159 | 
160 | // 从rating数据中提取所有的uid和mid，并去重
161 | val userRDD = ratingRDD.map(_._1).distinct()
162 | val movieRDD = ratingRDD.map(_._2).distinct()
163 | val userMovies = userRDD.cartesian(movieRDD)
164 | 
165 | // 调用model的predict方法预测评分
166 | val preRatings = model.predict(userMovies)
167 | 
168 | val userRecs = preRatings
169 |   .filter(_.rating > 0)
170 |   .map(rating => (rating.user, (rating.product, rating.rating))) // Rating->(uid, (mid, score))
171 |   .groupByKey()
172 |   .map {
173 |     case (uid, recs) => UserRecs(uid, recs.toList.sortWith(_._2 > _._2).take(USER_MAX_RECOMMENDATION).map(x => Recommendation(x._1, x._2)))
174 |   }
175 |   .toDF()
176 | ```
177 | 


--------------------------------------------------------------------------------
/Recommendation/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/ContentRecommendation/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
5 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
7 | org.eclipse.jdt.core.compiler.release=disabled
8 | org.eclipse.jdt.core.compiler.source=1.8
9 | 


--------------------------------------------------------------------------------
/Recommendation/ContentRecommendation/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/ContentRecommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>ContentRecommendation</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>org.scalanlp</groupId>
17 |             <artifactId>jblas</artifactId>
18 |             <version>${jblas.version}</version>
19 |         </dependency>
20 | 
21 |         <!-- Spark的依赖引入 -->
22 |         <dependency>
23 |             <groupId>org.apache.spark</groupId>
24 |             <artifactId>spark-core_2.11</artifactId>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.spark</groupId>
28 |             <artifactId>spark-sql_2.11</artifactId>
29 |         </dependency>
30 |         <dependency>
31 |             <groupId>org.apache.spark</groupId>
32 |             <artifactId>spark-mllib_2.11</artifactId>
33 |         </dependency>
34 | 
35 |         <!-- 引入Scala -->
36 |        <!-- <dependency>
37 |             <groupId>org.scala-lang</groupId>
38 |             <artifactId>scala-library</artifactId>
39 |         </dependency>-->
40 |         <!-- 加入MongoDB的驱动 -->
41 |         <dependency>
42 |             <groupId>org.mongodb</groupId>
43 |             <artifactId>casbah-core_2.11</artifactId>
44 |             <version>${casbah.version}</version>
45 |         </dependency>
46 |         <dependency>
47 |             <groupId>org.mongodb.spark</groupId>
48 |             <artifactId>mongo-spark-connector_2.11</artifactId>
49 |             <version>${mongodb-spark.version}</version>
50 |         </dependency>
51 |     </dependencies>
52 | 
53 |     <build>
54 |         <finalName>ContentRecommendation</finalName>
55 |         <plugins>
56 |             <plugin>
57 |                 <groupId>org.apache.maven.plugins</groupId>
58 |                 <artifactId>maven-assembly-plugin</artifactId>
59 |                 <configuration>
60 |                     <archive>
61 |                         <manifest>
62 |                             <mainClass>com.z.content.ContentRecommender</mainClass>
63 |                         </manifest>
64 |                     </archive>
65 |                     <descriptorRefs>
66 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
67 |                     </descriptorRefs>
68 |                 </configuration>
69 |             </plugin>
70 |         </plugins>
71 |     </build>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/Recommendation/ContentRecommendation/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=info, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n


--------------------------------------------------------------------------------
/Recommendation/ContentRecommendation/src/main/scala/com/z/content/ContentRecommender.scala:
--------------------------------------------------------------------------------
  1 | package com.z.content
  2 | 
  3 | import org.apache.spark.SparkConf
  4 | import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer}
  5 | import org.apache.spark.ml.linalg.SparseVector
  6 | import org.apache.spark.sql.SparkSession
  7 | import org.jblas.DoubleMatrix
  8 | 
  9 | // 需要的数据源是电影内容信息
 10 | case class Movie(mid: Int, name: String, descri: String, timelong: String, issue: String, shoot: String, language: String,
 11 |                  genres: String, actors: String, directors: String)
 12 | 
 13 | case class MongoConfig(uri:String, db:String)
 14 | 
 15 | // 定义一个基准推荐对象
 16 | case class Recommendation( mid: Int, score: Double )
 17 | 
 18 | // 定义电影内容信息提取出的特征向量的电影相似度列表
 19 | case class MovieRecs( mid: Int, recs: Seq[Recommendation] )
 20 | 
 21 | object ContentRecommender {
 22 | 
 23 |   // 定义表名和常量
 24 |   val MONGODB_MOVIE_COLLECTION = "Movie"
 25 | 
 26 |   val CONTENT_MOVIE_RECS = "ContentMovieRecs"
 27 | 
 28 |   def main(args: Array[String]): Unit = {
 29 |     val config = Map(
 30 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 31 |       "mongo.db" -> "recommender"
 32 |     )
 33 | 
 34 |     // 创建一个sparkConf
 35 |     val warehouseLocation : String = "hdfs://node1:9000/user/hive/warehouse"
 36 |     val sparkConf = new SparkConf()
 37 |       .setAppName("推荐系统 - 内容推荐")
 38 |       .setMaster("spark://node1:7077,node3:7077")
 39 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\RecommendationSystem\\Recommendation\\ContentRecommendation\\target\\" +
 40 |         "ContentRecommendation-jar-with-dependencies.jar"))
 41 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 42 |       .set("spark.num.executors", "3")
 43 |       .set("spark.executor.cores", "2")
 44 |       .set("spark.executor.memory", "1800m")
 45 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 46 | 
 47 |     // 创建一个SparkSession
 48 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 49 | 
 50 |     import spark.implicits._
 51 |     implicit val mongoConfig = MongoConfig(config("mongo.uri"), config("mongo.db"))
 52 | 
 53 |     // 加载数据，并作预处理
 54 |     val movieTagsDF = spark.read
 55 |       .option("uri", mongoConfig.uri)
 56 |       .option("collection", MONGODB_MOVIE_COLLECTION)
 57 |       .format("com.mongodb.spark.sql")
 58 |       .load()
 59 |       .as[Movie]
 60 |       .map(
 61 |         // 提取mid，name，genres三项作为原始内容特征，分词器默认按照空格做分词
 62 |         x => ( x.mid, x.name, x.genres.map(c=> if(c=='|') ' ' else c) )
 63 |       )
 64 |       .toDF("mid", "name", "genres")
 65 |       .cache()
 66 | 
 67 |     // 核心部分： 用TF-IDF从内容信息中提取电影特征向量
 68 |     // 创建一个分词器，默认按空格分词
 69 |     val tokenizer = new Tokenizer().setInputCol("genres").setOutputCol("words")
 70 | 
 71 |     // 用分词器对原始数据做转换，生成新的一列words
 72 |     val wordsData = tokenizer.transform(movieTagsDF)
 73 | 
 74 |     // 引入HashingTF工具，可以把一个词语序列转化成对应的词频
 75 |     val hashingTF = new HashingTF().setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(50)
 76 |     val featurizedData = hashingTF.transform(wordsData)
 77 | 
 78 |     // 引入IDF工具，可以得到idf模型
 79 |     val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
 80 |     // 训练idf模型，得到每个词的逆文档频率
 81 |     val idfModel = idf.fit(featurizedData)
 82 |     // 用模型对原数据进行处理，得到文档中每个词的tf-idf，作为新的特征向量
 83 |     val rescaledData = idfModel.transform(featurizedData)
 84 | 
 85 |     val movieFeatures = rescaledData.map {
 86 |       row => (row.getAs[Int]("mid"), row.getAs[SparseVector]("features").toArray)
 87 |       }.rdd
 88 |       .map {
 89 |         x => (x._1, new DoubleMatrix(x._2))
 90 |       }
 91 | 
 92 |     // 对所有电影两两计算它们的相似度，先做笛卡尔积
 93 |     val movieRecs = movieFeatures.cartesian(movieFeatures)
 94 |       .filter{
 95 |         // 把自己跟自己的配对过滤掉
 96 |         case (a, b) => a._1 != b._1
 97 |       }
 98 |       .map{
 99 |         case (a, b) => {
100 |           val simScore = this.consinSim(a._2, b._2)
101 |           ( a._1, ( b._1, simScore ) )
102 |         }
103 |       }
104 |       .filter(_._2._2 > 0.6)    // 过滤出相似度大于0.6的
105 |       .groupByKey()
106 |       .map{
107 |         case (mid, items) => MovieRecs( mid, items.toList.sortWith(_._2 > _._2).map(x => Recommendation(x._1, x._2)) )
108 |       }
109 |       .toDF()
110 | 
111 |     movieRecs.write
112 |       .option("uri", mongoConfig.uri)
113 |       .option("collection", CONTENT_MOVIE_RECS)
114 |       .mode("overwrite")
115 |       .format("com.mongodb.spark.sql")
116 |       .save()
117 | 
118 |     spark.stop()
119 |   }
120 | 
121 |   // 求向量余弦相似度
122 |   def consinSim(movie1: DoubleMatrix, movie2: DoubleMatrix):Double ={
123 |     movie1.dot(movie2) / ( movie1.norm2() * movie2.norm2() )
124 |   }
125 | }
126 | 


--------------------------------------------------------------------------------
/Recommendation/ItemCFRecommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>ItemCFRecommendation</artifactId>
13 |     <dependencies>
14 |         <!-- Spark的依赖引入 -->
15 |         <dependency>
16 |             <groupId>org.apache.spark</groupId>
17 |             <artifactId>spark-core_2.11</artifactId>
18 |         </dependency>
19 |         <dependency>
20 |             <groupId>org.apache.spark</groupId>
21 |             <artifactId>spark-sql_2.11</artifactId>
22 |         </dependency>
23 |        <!-- &lt;!&ndash; 引入Scala &ndash;&gt;
24 |         <dependency>
25 |             <groupId>org.scala-lang</groupId>
26 |             <artifactId>scala-library</artifactId>
27 |         </dependency>-->
28 | 
29 |         <!-- 加入MongoDB的驱动 for scala version -->
30 |         <dependency>
31 |             <groupId>org.mongodb</groupId>
32 |             <artifactId>casbah-core_2.11</artifactId>
33 |             <version>${casbah.version}</version>
34 |         </dependency>
35 |         <!-- 用于Spark和MongoDB的对接 -->
36 |         <dependency>
37 |             <groupId>org.mongodb.spark</groupId>
38 |             <artifactId>mongo-spark-connector_2.11</artifactId>
39 |             <version>${mongodb-spark.version}</version>
40 |         </dependency>
41 |     </dependencies>
42 | 
43 |     <build>
44 |         <finalName>ItemCFRecommendation</finalName>
45 |         <plugins>
46 |             <plugin>
47 |                 <groupId>org.apache.maven.plugins</groupId>
48 |                 <artifactId>maven-assembly-plugin</artifactId>
49 |                 <configuration>
50 |                     <archive>
51 |                         <manifest>
52 |                             <mainClass>com.z.itemcf.ItemCFRecommender</mainClass>
53 |                         </manifest>
54 |                     </archive>
55 |                     <descriptorRefs>
56 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
57 |                     </descriptorRefs>
58 |                 </configuration>
59 |             </plugin>
60 |         </plugins>
61 |     </build>
62 | </project>


--------------------------------------------------------------------------------
/Recommendation/ItemCFRecommendation/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=warn, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n
5 | 


--------------------------------------------------------------------------------
/Recommendation/ItemCFRecommendation/src/main/scala/com/z/itemcf/ItemCFRecommender.scala:
--------------------------------------------------------------------------------
  1 | package com.z.itemcf
  2 | 
  3 | import org.apache.spark.SparkConf
  4 | import org.apache.spark.sql.SparkSession
  5 | 
  6 | case class MongoConfig( uri: String, db: String )
  7 | 
  8 | case class MovieRating( uid: Int, mid: Int, score: Double, timestamp: Int )
  9 | 
 10 | case class Recommendation( mid: Int, score: Double )
 11 | 
 12 | case class MoviesRecs( mid: Int, recs: Seq[Recommendation] )
 13 | 
 14 | object ItemCFRecommender {
 15 |   // 定义常量和表名
 16 |   val MONGODB_RATING_COLLECTION = "Rating"
 17 |   val ITEM_CF_MOVIE_RECS = "ItemCFMoviesRecs"
 18 |   val MAX_RECOMMENDATION = 10
 19 | 
 20 |   def main(args: Array[String]): Unit = {
 21 |     val config = Map(
 22 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 23 |       "mongo.db" -> "recommender"
 24 |     )
 25 |     // 创建一个sparkConf
 26 |     val warehouseLocation : String = "hdfs://node1:9000/user/hive/warehouse"
 27 |     val sparkConf = new SparkConf()
 28 |       .setAppName("推荐系统 - itemCF")
 29 |       .setMaster("spark://node1:7077,node3:7077")
 30 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\RecommendationSystem\\Recommendation\\ItemCFRecommendation\\target\\" +
 31 |         "ItemCFRecommendation-jar-with-dependencies.jar"))
 32 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 33 |       .set("spark.num.executors", "3")
 34 |       .set("spark.executor.cores", "2")
 35 |       .set("spark.executor.memory", "1800m")
 36 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 37 | 
 38 |     // 创建spark session
 39 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 40 | 
 41 |     import spark.implicits._
 42 |     implicit val mongoConfig = MongoConfig( config("mongo.uri"), config("mongo.db") )
 43 | 
 44 |     // 加载数据，转换成DF进行处理
 45 |     val ratingDF = spark.read
 46 |       .option("uri", mongoConfig.uri)
 47 |       .option("collection", MONGODB_RATING_COLLECTION)
 48 |       .format("com.mongodb.spark.sql")
 49 |       .load()
 50 |       .as[MovieRating]
 51 |       .map(
 52 |         x => ( x.uid, x.mid, x.score )
 53 |       )
 54 |       .toDF("uid", "mid", "score")
 55 |       .cache()
 56 | 
 57 |     val productRatingCountDF = ratingDF.groupBy("mid").count() //默认clos as:count
 58 |     val ratingWithCountDF = ratingDF.join(productRatingCountDF, "mid")
 59 | 
 60 |     // 核心算法：
 61 |     val joinedDF = ratingWithCountDF.join(ratingWithCountDF, "uid") // .where($"mid" != $"mid")
 62 |       .toDF("uid","mid1","score1","count1","mid2","score2","count2")
 63 |       .select("uid","mid1","count1","mid2","count2").where($"mid1" =!= $"mid2")
 64 | 
 65 |     joinedDF.createOrReplaceTempView("joined")
 66 |     // scala """ | stripMargin妙用.注意string.spilit("""|""")
 67 |     val cooccurrenceDF = spark.sql(
 68 |       """
 69 |         |select mid1
 70 |         |, mid2
 71 |         |, count(uid) as cocount
 72 |         |, first(count1) as count1
 73 |         |, first(count2) as count2
 74 |         |from joined
 75 |         |group by mid1, mid2
 76 |       """.stripMargin
 77 |     ).cache()
 78 | 
 79 |     // ( mid1, (mid2, score) )
 80 |     val simDF = cooccurrenceDF.map{
 81 |       row =>
 82 |         val coocSim = cooccurrenceSim( row.getAs[Long]("cocount"), row.getAs[Long]("count1"),
 83 |           row.getAs[Long]("count2") )
 84 |         ( row.getInt(0), ( row.getInt(1), coocSim ) )
 85 |     }
 86 |       .rdd
 87 |       .groupByKey()
 88 |       .map{
 89 |         case (mid, recs) =>
 90 |           MoviesRecs( mid, recs.toList.sortWith(_._2>_._2).take(MAX_RECOMMENDATION)
 91 |             .map(x=>Recommendation(x._1,x._2)) )
 92 |       }
 93 |       .toDF()
 94 | 
 95 |     simDF.write
 96 |       .option("uri", mongoConfig.uri)
 97 |       .option("collection", ITEM_CF_MOVIE_RECS)
 98 |       .mode("overwrite")
 99 |       .format("com.mongodb.spark.sql")
100 |       .save()
101 | 
102 |     spark.stop()
103 |   }
104 | 
105 |   // 同现相似度计算公式
106 |   def cooccurrenceSim(coCount: Long, count1: Long, count2: Long): Double ={
107 |     coCount / math.sqrt( count1 * count2 )
108 |   }
109 | 
110 | }
111 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java/com/z/kafkastream/Application.java=UTF-8
3 | encoding//src/main/java/com/z/kafkastream/LogProcessor.java=UTF-8
4 | encoding//src/main/java/com/z/kafkastream/MyEventTimeExtractor.java=UTF-8
5 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
5 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
7 | org.eclipse.jdt.core.compiler.release=disabled
8 | org.eclipse.jdt.core.compiler.source=1.8
9 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>KafkaStream</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>org.apache.kafka</groupId>
17 |             <artifactId>kafka-streams</artifactId>
18 |             <version>${kafka.version}</version>
19 |         </dependency>
20 | 
21 |        <dependency>
22 |             <groupId>org.apache.kafka</groupId>
23 |             <artifactId>kafka-clients</artifactId>
24 |             <version>${kafka.version}</version>
25 |         </dependency>
26 |     </dependencies>
27 | 
28 |     <build>
29 |         <finalName>kafkastream</finalName>
30 |         <plugins>
31 |             <plugin>
32 |                 <groupId>org.apache.maven.plugins</groupId>
33 |                 <artifactId>maven-assembly-plugin</artifactId>
34 |                 <configuration>
35 |                     <archive>
36 |                         <manifest>
37 |                             <mainClass>com.z.kafkastream.Application</mainClass>
38 |                         </manifest>
39 |                     </archive>
40 |                     <descriptorRefs>
41 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
42 |                     </descriptorRefs>
43 |                 </configuration>
44 |                 <executions>
45 |                     <execution>
46 |                         <id>make-assembly</id>
47 |                         <phase>package</phase>
48 |                         <goals>
49 |                             <goal>single</goal>
50 |                         </goals>
51 |                     </execution>
52 |                 </executions>
53 |             </plugin>
54 |         </plugins>
55 |     </build>
56 | 
57 | 
58 | </project>


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/src/main/java/com/z/kafkastream/Application.java:
--------------------------------------------------------------------------------
 1 | package com.z.kafkastream;
 2 | 
 3 | import org.apache.kafka.streams.KafkaStreams;
 4 | import org.apache.kafka.streams.StreamsConfig;
 5 | import org.apache.kafka.streams.processor.TopologyBuilder;
 6 | 
 7 | import java.util.Properties;
 8 | 
 9 | /**
10 |  * kafka stream实时流
11 |  */
12 | public class Application {
13 |     public static void main(String[] args) {
14 |         String brokers = "node1:9092,node2:9092,node3:9092";
15 | //        String zookeepers = "node1:2181,node2:2181,node3:2181";
16 | 
17 |         // topic
18 |         String from = "log";
19 |         String to = "recommender";
20 | 
21 |         // kafka消费者配置
22 |         Properties settings = new Properties();
23 |         settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "logFilter");
24 |         settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
25 |         // flume端的kafka是0.8.11版本，兼容发送没有发送时间戳
26 |         settings.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MyEventTimeExtractor.class.getName());
27 | //        settings.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, zookeepers);
28 | 
29 |      // 创建 kafka stream 配置对象
30 |         StreamsConfig config = new StreamsConfig(settings);
31 | 
32 |      // 创建一个拓扑建构器
33 |         TopologyBuilder builder = new TopologyBuilder();
34 | 
35 |      // 定义流处理的拓扑结构
36 |         builder.addSource("SOURCE", from)
37 |                 .addProcessor("PROCESSOR", ()-> new LogProcessor(), "SOURCE")
38 |                 .addSink("SINK", to, "PROCESSOR");
39 | 
40 |         KafkaStreams streams = new KafkaStreams( builder, config );
41 | 
42 |         streams.start();
43 | 
44 |         System.out.println("Kafka stream started!>>>>>>>>>>>");
45 | 
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/src/main/java/com/z/kafkastream/LogProcessor.java:
--------------------------------------------------------------------------------
 1 | package com.z.kafkastream;
 2 | 
 3 | import org.apache.kafka.streams.processor.Processor;
 4 | import org.apache.kafka.streams.processor.ProcessorContext;
 5 | 
 6 | public class LogProcessor implements Processor<byte[], byte[]> {
 7 | 
 8 |     private ProcessorContext context;
 9 | 
10 |     @Override
11 |     public void init(ProcessorContext processorContext) {
12 |         this.context = processorContext;
13 |     }
14 | 
15 |     @Override
16 |     public void process(byte[] dummy, byte[] line) {
17 |         // 把收集到的日志信息用string表示
18 |         String input = new String(line);
19 | 
20 |         // flume已经正则匹配：根据前缀MOVIE_RATING_PREFIX:从日志信息中提取评分数据
21 |         if (input.contains("MOVIE_RATING_PREFIX:")) {
22 |             input = input.split("MOVIE_RATING_PREFIX:")[1].trim();
23 |             System.out.println("评分数据：" + input);
24 | 
25 |             context.forward("logProcessor".getBytes(), input.getBytes());
26 |         }
27 |     }
28 | 
29 |     @Override
30 |     public void punctuate(long l) {
31 | 
32 |     }
33 | 
34 |     @Override
35 |     public void close() {
36 | 
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/src/main/java/com/z/kafkastream/MyEventTimeExtractor.java:
--------------------------------------------------------------------------------
 1 | package com.z.kafkastream;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 4 | import org.apache.kafka.streams.processor.TimestampExtractor;
 5 | 
 6 | /**
 7 |  * kafka 0.10以下生产消息没有时间戳，flume使用kafka版本较低
 8 |  */
 9 | public class MyEventTimeExtractor implements TimestampExtractor{
10 | 
11 |     @Override
12 |     public long extract(ConsumerRecord<Object, Object> record, long previousTimestamp) {
13 |         return System.currentTimeMillis();
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/Recommendation/KafkaStream/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=warn, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
5 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
7 | org.eclipse.jdt.core.compiler.release=disabled
8 | org.eclipse.jdt.core.compiler.source=1.8
9 | 


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>OfflineRecommender</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>org.scalanlp</groupId>
17 |             <artifactId>jblas</artifactId>
18 |             <version>${jblas.version}</version>
19 |         </dependency>
20 | 
21 |         <!-- Spark的依赖引入 -->
22 |         <dependency>
23 |             <groupId>org.apache.spark</groupId>
24 |             <artifactId>spark-core_2.11</artifactId>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.spark</groupId>
28 |             <artifactId>spark-sql_2.11</artifactId>
29 |         </dependency>
30 |         <dependency>
31 |             <groupId>org.apache.spark</groupId>
32 |             <artifactId>spark-mllib_2.11</artifactId>
33 |         </dependency>
34 | 
35 |         <!-- 引入Scala -->
36 |         <!--<dependency>
37 |             <groupId>org.scala-lang</groupId>
38 |             <artifactId>scala-library</artifactId>
39 |         </dependency>-->
40 | 
41 |         <!-- 加入MongoDB的驱动 -->
42 |         <dependency>
43 |             <groupId>org.mongodb</groupId>
44 |             <artifactId>casbah-core_2.11</artifactId>
45 |             <version>${casbah.version}</version>
46 |         </dependency>
47 |         <dependency>
48 |             <groupId>org.mongodb.spark</groupId>
49 |             <artifactId>mongo-spark-connector_2.11</artifactId>
50 |             <version>${mongodb-spark.version}</version>
51 |         </dependency>
52 |     </dependencies>
53 | 
54 |     <build>
55 |         <finalName>OfflineRecommendation</finalName>
56 |         <plugins>
57 |             <plugin>
58 |                 <groupId>org.apache.maven.plugins</groupId>
59 |                 <artifactId>maven-assembly-plugin</artifactId>
60 |                 <configuration>
61 |                     <archive>
62 |                         <manifest>
63 |                             <mainClass>com.z.offline.OfflineRecommender</mainClass>
64 |                         </manifest>
65 |                     </archive>
66 |                     <descriptorRefs>
67 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
68 |                     </descriptorRefs>
69 |                 </configuration>
70 |             </plugin>
71 |         </plugins>
72 |     </build>
73 | 
74 | </project>


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=info, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/src/main/scala/com/z/offline/ALSTrainer.scala:
--------------------------------------------------------------------------------
  1 | package com.z.offline
  2 | 
  3 | import breeze.numerics.sqrt
  4 | import com.z.offline.OfflineRecommender.MONGODB_RATING_COLLECTION
  5 | import org.apache.spark.SparkConf
  6 | import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
  7 | import org.apache.spark.rdd.RDD
  8 | import org.apache.spark.sql.SparkSession
  9 | //import org.apache.spark.ml.recommendation.{ALS, MatrixFactorizationModel, Rating}
 10 | 
 11 | /**
 12 |   * 隐语模型超参数调整优化
 13 |   */
 14 | object ALSTrainer {
 15 |   def main(args: Array[String]): Unit = {
 16 |     val config = Map(
 17 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 18 |       "mongo.db" -> "recommender"
 19 |     )
 20 |     // 创建一个sparkConf
 21 |     val warehouseLocation = "hdfs://node1:9000/user/hive/warehouse";
 22 |     val sparkConf = new SparkConf()
 23 |       .setAppName("推荐系统 - LFM调参cache")
 24 |       .setMaster("spark://node1:7077,node3:7077")
 25 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\MovieRecommendSystem\\recommender\\OfflineRecommender\\target\\" +
 26 |         "OfflineRecommender-jar-with-dependencies.jar"))
 27 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 28 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 29 |       .set("spark.num.executors", "3")
 30 |       .set("spark.executor.cores", "1")
 31 |       .set("spark.executor.memory", "1024m")
 32 | 
 33 |     // 创建一个SparkSession
 34 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 35 | 
 36 |     import spark.implicits._
 37 |     implicit val mongoConfig = MongoConfig(config("mongo.uri"), config("mongo.db"))
 38 | 
 39 |     // 加载评分数据
 40 |     val ratingRDD = spark.read
 41 |       .option("uri", mongoConfig.uri)
 42 |       .option("collection", MONGODB_RATING_COLLECTION)
 43 |       .format("com.mongodb.spark.sql")
 44 |       .load()
 45 |       .as[MovieRating]
 46 |       .rdd
 47 |       .map(rating => Rating(rating.uid, rating.mid, rating.score))
 48 | //      .cache()
 49 | 
 50 |     // 随机切分数据集，生成训练集和测试集
 51 |     val splits = ratingRDD.randomSplit(Array(0.8, 0.2))
 52 |     val trainingRDD = splits(0)
 53 |     val testRDD = splits(1)
 54 | 
 55 |     // 模型参数选择，输出最优参数
 56 |     adjustALSParam(trainingRDD, testRDD)
 57 | 
 58 |     spark.close()
 59 |   }
 60 | 
 61 |   /**
 62 |     * LFM迭代调参
 63 |     * @param trainData
 64 |     * @param testData
 65 |     */
 66 |   def adjustALSParam(trainData: RDD[Rating], testData: RDD[Rating]): Unit = {
 67 |     val result = for (rank <- Array(50, 100, 200, 300); lambda <- Array(0.01, 0.1, 1))
 68 |       yield {
 69 |         val model = ALS.train(trainData, rank, 5, lambda)
 70 |         val rmse = getRMSE(model, testData)
 71 |         (rank, lambda, rmse)
 72 |       }
 73 | 
 74 |     // 控制台打印输出最优参数
 75 |     println(result.minBy(_._3))
 76 |   }
 77 | 
 78 |   /**
 79 |     * 均方误差的根
 80 |     * @param model
 81 |     * @param data
 82 |     * @return
 83 |     */
 84 |   def getRMSE(model: MatrixFactorizationModel, data: RDD[Rating]): Double = {
 85 |     // 计算预测评分
 86 |     val userProducts = data.map(item => (item.user, item.product))
 87 |     val predictRating = model.predict(userProducts)
 88 | 
 89 |     // 以uid，mid作为外键，inner join实际观测值和预测值
 90 |     val actual = data.map(item => ((item.user, item.product), item.rating))
 91 |     val predict = predictRating.map(item => ((item.user, item.product), item.rating))
 92 | 
 93 |     // 内连接得到(uid, mid),(actual, predict)
 94 |     sqrt(
 95 |       actual.join(predict).map {
 96 |         case ((uid, mid), (actual, pre)) => val err = actual - pre; err * err
 97 |       }.mean()
 98 |     )
 99 |   }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/Recommendation/OfflineRecommendation/src/main/scala/com/z/offline/OfflineRecommender.scala:
--------------------------------------------------------------------------------
  1 | package com.z.offline
  2 | 
  3 | /**
  4 |   * 离线推荐算法：
  5 |   * 根据用户推荐电影列表
  6 |   * 电影相似度矩阵列表
  7 |   */
  8 | 
  9 | import org.apache.spark.SparkConf
 10 | import org.apache.spark.mllib.recommendation.{ALS, Rating}
 11 | import org.apache.spark.sql.{DataFrame, SparkSession}
 12 | import org.jblas.DoubleMatrix
 13 | 
 14 | // Rate表：与ALS算法中的Rating取别开
 15 | case class MovieRating(uid: Int, mid: Int, score: Double, timestamp: Int)
 16 | 
 17 | // 定义一个基准推荐对象
 18 | case class Recommendation(mid: Int, score: Double)
 19 | 
 20 | // 定义基于预测评分的用户推荐列表
 21 | case class UserRecs(uid: Int, recs: Seq[Recommendation])
 22 | 
 23 | // 定义基于LFM电影特征向量的电影相似度列表
 24 | case class MovieRecs(mid: Int, recs: Seq[Recommendation])
 25 | 
 26 | case class MongoConfig(uri: String, db: String)
 27 | 
 28 | object OfflineRecommender {
 29 | 
 30 |   // 定义表名和常量
 31 |   val MONGODB_RATING_COLLECTION = "Rating"
 32 |   val USER_RECS = "UserRecs"
 33 |   val MOVIE_RECS = "MovieRecs"
 34 |   val USER_MAX_RECOMMENDATION = 20
 35 | 
 36 |   def main(args: Array[String]): Unit = {
 37 |     val config = Map(
 38 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 39 |       "mongo.db" -> "recommender"
 40 |     )
 41 | 
 42 |     // 创建一个sparkConf
 43 |     val warehouseLocation = "hdfs://node1:9000/user/hive/warehouse";
 44 |     val sparkConf = new SparkConf()
 45 |       .setAppName("推荐系统 - 离线推荐")
 46 |       .setMaster("spark://node1:7077,node3:7077")
 47 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\RecommendationSystem\\Recommendation\\OfflineRecommendation\\target\\" +
 48 |         "OfflineRecommendation-jar-with-dependencies.jar"))
 49 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 50 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 51 |       .set("spark.num.executors", "3")
 52 |       .set("spark.executor.cores", "1")
 53 |       .set("spark.executor.memory", "1800m")
 54 | 
 55 |     // 创建一个SparkSession
 56 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 57 | 
 58 |     import spark.implicits._
 59 |     implicit val mongoConfig = MongoConfig(config("mongo.uri"), config("mongo.db"))
 60 | 
 61 |     // 加载评分数据
 62 |     val ratingRDD = spark.read
 63 |       .option("uri", mongoConfig.uri)
 64 |       .option("collection", MONGODB_RATING_COLLECTION)
 65 |       .format("com.mongodb.spark.sql")
 66 |       .load()
 67 |       .as[MovieRating]
 68 |       .rdd
 69 |       .map(rating => (rating.uid, rating.mid, rating.score))
 70 |       .cache() // 多次计算缓存到内存中
 71 | 
 72 |     // 训练隐语义模型：Rating(user:Int, product:Int, rating:Double)
 73 |     val trainData = ratingRDD.map(x => Rating(x._1, x._2, x._3))
 74 |     // 多个变量赋值
 75 |     val (rank, iterations, lambda) = (200, 5, 0.1)
 76 |     val model = ALS.train(trainData, rank, iterations, lambda)
 77 | 
 78 |     // 从rating数据中提取所有的uid和mid，并去重
 79 |     val userRDD = ratingRDD.map(_._1).distinct()
 80 |     val movieRDD = ratingRDD.map(_._2).distinct()
 81 |     val userMovies = userRDD.cartesian(movieRDD)
 82 | 
 83 |     // 调用model的predict方法预测评分
 84 |     val preRatings = model.predict(userMovies)
 85 | 
 86 |     val userRecs = preRatings
 87 |       .filter(_.rating > 0)
 88 |       .map(rating => (rating.user, (rating.product, rating.rating))) // Rating->(uid, (mid, score))
 89 |       .groupByKey()
 90 |       .map {
 91 |         case (uid, recs) => UserRecs(uid, recs.toList.sortWith(_._2 > _._2).take(USER_MAX_RECOMMENDATION).map(x => Recommendation(x._1, x._2)))
 92 |       }
 93 |       .toDF()
 94 | 
 95 |     storeDFInMongoDB(userRecs, USER_RECS)
 96 | 
 97 |     /**
 98 |       * vector:
 99 |       * local vector是一种索引是0开始的整数、内容为double类型，存储在单机上的向量。MLlib支持两种矩阵，dense密集型和sparse稀疏型。
100 |       * 一个dense类型的向量背后其实就是一个数组，而sparse向量背后则是两个并行数组——索引数组和值数组。比如向量(1.0, 0.0, 3.0)
101 |       * 既可以用密集型向量表示为[1.0, 0.0, 3.0]，也可以用稀疏型向量表示为(3, [0,2],[1.0,3.0])，其中3是数组的大小。
102 |       *
103 |       * dense vector与sparse vector:
104 |       * new DenseVector(this.toArray)
105 |       * 创建dense vector
106 |       * val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
107 |       * 创建sparse vector
108 |       * val sv1: Vector = Vectors.sparse(3, Array(0,2), Array(1.0,3.0))
109 |       * val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2,3.0)))
110 |       *
111 |       * vecor norm范数和sqdist距离:
112 |       * val norm1Vec = Vectors.dense(1.0,-1.0,2.0)
113 |       * // 第一范数，就是绝对值相加
114 |       * println(Vectors.norm(norm1Vec,1)) // 4.0
115 |       * // 第二番薯，就是平方和开根号
116 |       * println(Vectors.norm(norm1Vec,2)) // 2.449489742783178
117 |       * // 无限范数
118 |       * println(Vectors.norm(norm1Vec,1000)) //2.0
119 |       *
120 |       * val sq1 = Vectors.dense(1.0, 2.0, 3.0)
121 |       * val sq2 = Vectors.dense(2.0, 4.0, 6.0)
122 |       * println(Vectors.sqdist(sq1, sq2)) // (2-1)^2 + (4-2)^2 + (6-3)^2 = 14
123 |       *
124 |       * labeled point:
125 |       * 这种labeled point其实内部也是一个vector，可能是dense也可能是sparse，不过多了一个标签列。在ML里面，labeled point
126 |       * 通常用于有监督算法。这个label是double类型的，这样既可以用于回归算法，也可以用于分类。在二分类中，Label不是0就是1；
127 |       * 在多分类中label可能从0开始，1，2，3，4....
128 |       * val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
129 |       * val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))
130 |       * label index1:value1 index2:value2 ...
131 |       * val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
132 |       **/
133 |     val movieFeatures = model.productFeatures.map {
134 | 
135 |       case (mid, features) => (mid, new DoubleMatrix(features))
136 |     }
137 | 
138 |     val movieRecs = movieFeatures.cartesian(movieFeatures)
139 |       .filter {
140 |         case (a, b) => a._1 != b._1
141 |       }
142 |       .map{
143 |         case (a, b) => val simScore = this.consinSim(a._2, b._2);(a._1, (b._1, simScore))
144 |       }
145 |       .filter(_._2._2 > 0.6) // 过滤出相似度大于0.6的
146 |       .groupByKey()
147 |       .map {case (mid, items) => MovieRecs(mid, items.toList.sortWith(_._2 > _._2).map(x => Recommendation(x._1, x._2)))}
148 |       .toDF()
149 | 
150 |     storeDFInMongoDB(movieRecs, MOVIE_RECS)
151 | 
152 |     spark.stop()
153 |   }
154 | 
155 |   /**
156 |     * 求向量余弦相似度：矩阵内积/第二范数乘积
157 |     * 皮尔逊相关系数：先对向量每一分量减去分量均值，再求余弦相似度(叫取中心化)
158 |     * @param movie1
159 |     * @param movie2
160 |     * @return
161 |     */
162 |   def consinSim(movie1: DoubleMatrix, movie2: DoubleMatrix): Double = {
163 |     movie1.dot(movie2) / (movie1.norm2() * movie2.norm2())
164 |   }
165 | 
166 |   def storeDFInMongoDB(df: DataFrame, collection_name: String)(implicit mongoConfig: MongoConfig): Unit ={
167 |     df.write
168 |       .option("uri", mongoConfig.uri)
169 |       .option("collection", collection_name)
170 |       .mode("overwrite")
171 |       .format("com.mongodb.spark.sql")
172 |       .save()
173 |   }
174 | 
175 | }
176 | 


--------------------------------------------------------------------------------
/Recommendation/StatisticsRecommendation/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
5 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
7 | org.eclipse.jdt.core.compiler.release=disabled
8 | org.eclipse.jdt.core.compiler.source=1.8
9 | 


--------------------------------------------------------------------------------
/Recommendation/StatisticsRecommendation/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/StatisticsRecommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>StatisticsRecommender</artifactId>
13 | 
14 |     <dependencies>
15 |         <!-- Spark的依赖引入 -->
16 |         <dependency>
17 |             <groupId>org.apache.spark</groupId>
18 |             <artifactId>spark-core_2.11</artifactId>
19 |         </dependency>
20 |         <dependency>
21 |             <groupId>org.apache.spark</groupId>
22 |             <artifactId>spark-sql_2.11</artifactId>
23 |         </dependency>
24 |        <!-- &lt;!&ndash; 引入Scala &ndash;&gt;
25 |         <dependency>
26 |             <groupId>org.scala-lang</groupId>
27 |             <artifactId>scala-library</artifactId>
28 |         </dependency>-->
29 | 
30 |         <!-- 加入MongoDB的驱动 -->
31 |         <dependency>
32 |             <groupId>org.mongodb</groupId>
33 |             <artifactId>casbah-core_2.11</artifactId>
34 |             <version>${casbah.version}</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>org.mongodb.spark</groupId>
38 |             <artifactId>mongo-spark-connector_2.11</artifactId>
39 |             <version>${mongodb-spark.version}</version>
40 |         </dependency>
41 |     </dependencies>
42 | 
43 |     <build>
44 |         <finalName>StatisticsRecommendation</finalName>
45 | 
46 |         <plugins>
47 |             <plugin>
48 |                 <groupId>org.apache.maven.plugins</groupId>
49 |                 <artifactId>maven-assembly-plugin</artifactId>
50 |                 <configuration>
51 |                     <archive>
52 |                         <manifest>
53 |                             <mainClass>com.z.statistics.StatisticsRecommender</mainClass>
54 |                         </manifest>
55 |                     </archive>
56 |                     <descriptorRefs>
57 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
58 |                     </descriptorRefs>
59 |                 </configuration>
60 |             </plugin>
61 |         </plugins>
62 |     </build>
63 | 
64 | </project>


--------------------------------------------------------------------------------
/Recommendation/StatisticsRecommendation/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=info, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n


--------------------------------------------------------------------------------
/Recommendation/StatisticsRecommendation/src/main/scala/com/z/statistics/StatisticsRecommender.scala:
--------------------------------------------------------------------------------
  1 | package com.z.statistics
  2 | /**
  3 |   * 离线统计。统计的功能：
  4 |   * 电影的评分次数统计：mid，count
  5 |   * 按月维度评分排行榜：电影每月评分次数并做时间倒序、评分次数倒序
  6 |   * 统计电影的平均评分：mid，avg
  7 |   * 各类别电影评分Top10统计
  8 |   */
  9 | import java.text.SimpleDateFormat
 10 | import java.util.Date
 11 | import org.apache.spark.SparkConf
 12 | import org.apache.spark.sql.{DataFrame, SparkSession}
 13 | /**
 14 |   * Movie 数据集
 15 |   * 260                                         电影ID，mid
 16 |   * Star Wars: Episode IV - A New Hope (1977)   电影名称，name
 17 |   * Princess Leia is captured and held hostage  详情描述，descri
 18 |   * 121 minutes                                 时长，timelong
 19 |   * September 21, 2004                          发行时间，issue
 20 |   * 1977                                        拍摄时间，shoot
 21 |   * English                                     语言，language
 22 |   * Action|Adventure|Sci-Fi                     类型，genres
 23 |   * Mark Hamill|Harrison Ford|Carrie Fisher     演员表，actors
 24 |   * George Lucas                                导演，directors
 25 |   */
 26 | case class Movie(mid: Int, name: String, descri: String, timelong: String, issue: String, shoot: String, language: String,
 27 |                  genres: String, actors: String, directors: String)
 28 | 
 29 | /**
 30 |   * Rate 电影评分数据集
 31 |   * @param uid 用户id
 32 |   * @param mid 电影id
 33 |   * @param score 评分
 34 |   * @param timestamp 评分时间戳
 35 |   */
 36 | case class Rating(uid: Int, mid: Int, score: Double, timestamp: Int )
 37 | 
 38 | // 定义一个基准推荐对象
 39 | case class Recommendation( mid: Int, score: Double )
 40 | 
 41 | // 定义电影类别top10推荐对象
 42 | case class GenresRecommendation(genres: String, recs: Seq[Recommendation])
 43 | 
 44 | case class MongoConfig(uri:String, db:String)
 45 | 
 46 | object StatisticsRecommender {
 47 | 
 48 |   // 定义表名
 49 |   val MONGODB_MOVIE_COLLECTION = "Movie"
 50 |   val MONGODB_RATING_COLLECTION = "Rating"
 51 | 
 52 |   //统计的表的名称
 53 |   val RATE_MORE_MOVIES = "RateMoreMovies"
 54 |   val RATE_MORE_RECENTLY_MOVIES = "RateMoreRecentlyMovies"
 55 |   val AVERAGE_MOVIES = "AverageMovies"
 56 |   val GENRES_TOP_MOVIES = "GenresTopMovies"
 57 | 
 58 |   def main(args: Array[String]): Unit = {
 59 |     val config = Map(
 60 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 61 |       "mongo.db" -> "recommender"
 62 |     )
 63 | 
 64 |     // 创建一个sparkConf
 65 |     val warehouseLocation = "hdfs://node1:9000/user/hive/warehouse";
 66 |     val sparkConf = new SparkConf()
 67 |       .setAppName("推荐系统 - 离线统计")
 68 |       .setMaster("spark://node1:7077,node3:7077")
 69 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\RecommendationSystem\\Recommendation\\StatisticsRecommendation\\target\\" +
 70 |         "StatisticsRecommendation-jar-with-dependencies.jar"))
 71 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 72 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 73 |       .set("spark.num.executors", "3")
 74 |       .set("spark.executor.cores", "2")
 75 |       .set("spark.executor.memory", "1024m")
 76 | 
 77 |     // 创建一个SparkSession
 78 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 79 | 
 80 |     import spark.implicits._
 81 |     implicit val mongoConfig = MongoConfig(config("mongo.uri"), config("mongo.db"))
 82 | 
 83 |     // 从mongodb加载数据
 84 |     val ratingDF = spark.read
 85 |       .option("uri", mongoConfig.uri)
 86 |       .option("collection", MONGODB_RATING_COLLECTION)
 87 |       .format("com.mongodb.spark.sql")
 88 |       .load()
 89 |       .as[Rating] // 装换成dataset强类型
 90 |       .toDF()
 91 | 
 92 |     val movieDF = spark.read
 93 |       .option("uri", mongoConfig.uri)
 94 |       .option("collection", MONGODB_MOVIE_COLLECTION)
 95 |       .format("com.mongodb.spark.sql")
 96 |       .load()
 97 |       .as[Movie] // 装换成dataset强类型
 98 |       .toDF()
 99 | 
100 |     // ratings评分表copy到内存中
101 |     ratingDF.createOrReplaceTempView("ratings")
102 | 
103 |     // 电影的评分次数统计：mid，count
104 |     val rateMoreMoviesDF = spark.sql("select mid, count(mid) as count from ratings group by mid")
105 |     storeDFInMongoDB(rateMoreMoviesDF, RATE_MORE_MOVIES )
106 | 
107 |     // 按月维度评分排行榜：电影每月评分次数并做时间倒序、评分次数倒序
108 |     val simpleDateFormat = new SimpleDateFormat("yyyyMM")
109 |     spark.udf.register("changeDate", (x: Int) => simpleDateFormat.format(new Date(x * 1000L)).toInt)
110 |     val ratingOfYearMonth = spark.sql("select mid, score, changeDate(timestamp) as yearmonth from ratings")
111 |     ratingOfYearMonth.createOrReplaceTempView("ratingOfMonth")
112 |     val rateMoreRecentlyMoviesDF = spark.sql("select mid, count(mid) as count, yearmonth from ratingOfMonth" +
113 |       " group by yearmonth, mid order by yearmonth desc, count desc")
114 |     storeDFInMongoDB(rateMoreRecentlyMoviesDF, RATE_MORE_RECENTLY_MOVIES)
115 | 
116 |     // 统计电影的平均评分：mid，avg
117 |     val averageMoviesDF = spark.sql("select mid, avg(score) as avg from ratings group by mid")
118 |     storeDFInMongoDB(averageMoviesDF, AVERAGE_MOVIES)
119 | 
120 |     // 类别下热门电影榜:对比hive与rdd实现
121 |     /**
122 |       * select * from (
123 |       *   select mid, score, genres_name from movie_with_score lateral view explode(genres) table_tmp as genres_name
124 |       *   ) t row_number over(partition by genres_name order by score desc) rank
125 |       *   where rank <=10
126 |       */
127 |     val genres = List("Action","Adventure","Animation","Comedy","Crime","Documentary","Drama","Family","Fantasy","Foreign",
128 |       "History","Horror","Music","Mystery" ,"Romance","Science","Tv","Thriller","War","Western")
129 |     val movieWithScore = movieDF.join(averageMoviesDF, "mid")
130 |     // movieWithScore.agg($"avg".as("score"))
131 |     val genresRDD = spark.sparkContext.makeRDD(genres)
132 | 
133 |     // DataFrame->RDD, 内容是Row
134 |     val genresTopMoviesDF = genresRDD.cartesian(movieWithScore.rdd)
135 |       .filter{case (genre, movieRow) =>
136 |         movieRow.getAs[String]("genres").toLowerCase.contains(genre.toLowerCase)
137 |       }
138 |       .map{
139 |         case (genre, movieRow) => (genre, (movieRow.getAs[Int]("mid"), movieRow.getAs[Double]("avg")))
140 |       }
141 |       .groupByKey()
142 |       .map{case (genre, items) =>
143 |         GenresRecommendation(genre, items.toList.sortWith(_._2>_._2).take(10).map(item=> Recommendation(item._1, item._2)))
144 |       }
145 |       .toDF()
146 | 
147 |     storeDFInMongoDB(genresTopMoviesDF, GENRES_TOP_MOVIES)
148 | 
149 |     spark.stop()
150 |   }
151 | 
152 |   def storeDFInMongoDB(df: DataFrame, collection_name: String)(implicit mongoConfig: MongoConfig): Unit ={
153 |     df.write
154 |       .option("uri", mongoConfig.uri)
155 |       .option("collection", collection_name)
156 |       .mode("overwrite")
157 |       .format("com.mongodb.spark.sql")
158 |       .save()
159 |   }
160 | 
161 | }
162 | 


--------------------------------------------------------------------------------
/Recommendation/StreamingRecommendation/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
5 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
7 | org.eclipse.jdt.core.compiler.release=disabled
8 | org.eclipse.jdt.core.compiler.source=1.8
9 | 


--------------------------------------------------------------------------------
/Recommendation/StreamingRecommendation/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/Recommendation/StreamingRecommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>Recommendation</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>StreamingRecommendation</artifactId>
13 | 
14 |     <dependencies>
15 |         <!-- Spark的依赖引入 -->
16 |         <dependency>
17 |             <groupId>org.apache.spark</groupId>
18 |             <artifactId>spark-core_2.11</artifactId>
19 |         </dependency>
20 |         <dependency>
21 |             <groupId>org.apache.spark</groupId>
22 |             <artifactId>spark-sql_2.11</artifactId>
23 |         </dependency>
24 |         <dependency>
25 |             <groupId>org.apache.spark</groupId>
26 |             <artifactId>spark-streaming_2.11</artifactId>
27 |         </dependency>
28 | 
29 |         <!-- 引入Scala -->
30 |        <!-- <dependency>
31 |             <groupId>org.scala-lang</groupId>
32 |             <artifactId>scala-library</artifactId>
33 |         </dependency>-->
34 | 
35 |         <!-- 加入MongoDB的驱动 -->
36 |         <!-- 用于代码方式连接MongoDB -->
37 |         <dependency>
38 |             <groupId>org.mongodb</groupId>
39 |             <artifactId>casbah-core_2.11</artifactId>
40 |             <version>${casbah.version}</version>
41 |         </dependency>
42 |         <!-- 用于Spark和MongoDB的对接 -->
43 |         <dependency>
44 |             <groupId>org.mongodb.spark</groupId>
45 |             <artifactId>mongo-spark-connector_2.11</artifactId>
46 |             <version>${mongodb-spark.version}</version>
47 |         </dependency>
48 | 
49 |         <!-- redis -->
50 |         <dependency>
51 |             <groupId>redis.clients</groupId>
52 |             <artifactId>jedis</artifactId>
53 |             <version>${redis.version}</version>
54 |         </dependency>
55 | 
56 |         <!-- kafka -->
57 |         <dependency>
58 |             <groupId>org.apache.kafka</groupId>
59 |             <artifactId>kafka-clients</artifactId>
60 |             <version>${kafka.version}</version>
61 |         </dependency>
62 |         <dependency>
63 |             <groupId>org.apache.spark</groupId>
64 |             <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
65 |             <version>${spark.version}</version>
66 |         </dependency>
67 |     </dependencies>
68 | 
69 |     <build>
70 |         <finalName>StreamingRecommendation</finalName>
71 |         <plugins>
72 |             <plugin>
73 |                 <groupId>org.apache.maven.plugins</groupId>
74 |                 <artifactId>maven-assembly-plugin</artifactId>
75 |                 <configuration>
76 |                     <archive>
77 |                         <manifest>
78 |                             <mainClass>com.z.streaming.StreamingRecommender</mainClass>
79 |                         </manifest>
80 |                     </archive>
81 |                     <descriptorRefs>
82 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
83 |                     </descriptorRefs>
84 |                 </configuration>
85 |             </plugin>
86 |         </plugins>
87 |     </build>
88 | 
89 | </project>


--------------------------------------------------------------------------------
/Recommendation/StreamingRecommendation/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=warn, stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS}  %5p --- [%50t]  %-80c(line:%5L)  :  %m%n


--------------------------------------------------------------------------------
/Recommendation/StreamingRecommendation/src/main/scala/com/z/streaming/StreamingRecommender.scala:
--------------------------------------------------------------------------------
  1 | package com.z.streaming
  2 | 
  3 | import com.mongodb.casbah.commons.MongoDBObject
  4 | import com.mongodb.casbah.{MongoClient, MongoClientURI}
  5 | import org.apache.kafka.common.serialization.StringDeserializer
  6 | import org.apache.spark.SparkConf
  7 | import org.apache.spark.sql.SparkSession
  8 | import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
  9 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 10 | import redis.clients.jedis.Jedis
 11 | 
 12 | // 定义连接助手对象，序列化
 13 | object ConnHelper extends Serializable {
 14 |   lazy val jedis = new Jedis("192.168.0.241")
 15 |   lazy val mongoClient = MongoClient(MongoClientURI("mongodb://192.168.0.241:27017/recommender"))
 16 | }
 17 | 
 18 | case class MongoConfig(uri: String, db: String)
 19 | 
 20 | // 定义一个基准推荐对象
 21 | case class Recommendation(mid: Int, score: Double)
 22 | 
 23 | // 定义基于预测评分的用户推荐列表
 24 | case class UserRecs(uid: Int, recs: Seq[Recommendation])
 25 | 
 26 | // 定义基于LFM电影特征向量的电影相似度列表
 27 | case class MovieRecs(mid: Int, recs: Seq[Recommendation])
 28 | 
 29 | object StreamingRecommender {
 30 | 
 31 |   val MAX_USER_RATINGS_NUM = 20
 32 |   val MAX_SIM_MOVIES_NUM = 20
 33 |   val MONGODB_STREAM_RECS_COLLECTION = "StreamRecs"
 34 |   val MONGODB_RATING_COLLECTION = "Rating"
 35 |   val MONGODB_MOVIE_RECS_COLLECTION = "MovieRecs"
 36 | 
 37 |   def main(args: Array[String]): Unit = {
 38 |     val config = Map(
 39 |       "mongo.uri" -> "mongodb://192.168.0.241:27017/recommender",
 40 |       "mongo.db" -> "recommender",
 41 |       "kafka.topic" -> "recommender"
 42 |     )
 43 | 
 44 |     // 创建一个sparkConf
 45 |     val warehouseLocation : String = "hdfs://node1:9000/user/hive/warehouse"
 46 |     val sparkConf = new SparkConf()
 47 |       .setAppName("推荐系统 - 实时推荐")
 48 |       .setMaster("spark://node1:7077,node3:7077")
 49 |       .setJars(List("G:\\JavaEE\\Hadoop-Spark\\RecommendationSystem\\Recommendation\\StreamingRecommendation\\target\\" +
 50 |         "StreamingRecommendation-jar-with-dependencies.jar"))
 51 |       .setIfMissing("spark.driver.host", "192.168.0.28")
 52 |       .set("spark.num.executors", "3")
 53 |       .set("spark.executor.cores", "2")
 54 |       .set("spark.executor.memory", "1800m")
 55 |       .set("spark.sql.warehouse.dir", warehouseLocation)
 56 | 
 57 |     // 创建一个SparkSession
 58 |     val spark = SparkSession.builder().config(sparkConf).getOrCreate()
 59 | 
 60 |     // 拿到streaming context
 61 |     val sc = spark.sparkContext
 62 |     val ssc = new StreamingContext(sc, Seconds(2)) // batch duration
 63 | 
 64 |     import spark.implicits._
 65 |     implicit val mongoConfig = MongoConfig(config("mongo.uri"), config("mongo.db"))
 66 | 
 67 |     // 加载电影相似度矩阵数据，把它广播出去
 68 |     val simMovieMatrix = spark.read
 69 |       .option("uri", mongoConfig.uri)
 70 |       .option("collection", MONGODB_MOVIE_RECS_COLLECTION)
 71 |       .format("com.mongodb.spark.sql")
 72 |       .load()
 73 |       .as[MovieRecs]
 74 |       .rdd
 75 |       .map{ movieRecs => // 为了查询相似度方便，转换成map
 76 |         (movieRecs.mid, movieRecs.recs.map( x=> (x.mid, x.score) ).toMap )
 77 |       }.collectAsMap()
 78 | 
 79 |     val simMovieMatrixBroadCast = sc.broadcast(simMovieMatrix)
 80 | 
 81 |     // 定义kafka连接参数:
 82 |     val kafkaParam = Map(
 83 |       "bootstrap.servers" -> "node1:9092,node2:9092,node3:9092",
 84 |       "key.deserializer" -> classOf[StringDeserializer],
 85 |       "value.deserializer" -> classOf[StringDeserializer],
 86 |       "auto.offset.reset" -> "latest",
 87 |       "group.id" -> "recommender"
 88 | //      "enable.auto.commit" -> false,
 89 | //     "receive.buffer.bytes" -> 65536
 90 |     )
 91 |     // 通过kafka创建一个DStream
 92 |     val kafkaStream = KafkaUtils.createDirectStream[String, String](ssc, LocationStrategies.PreferConsistent,
 93 |       ConsumerStrategies.Subscribe[String, String](Array(config("kafka.topic")), kafkaParam)
 94 |     )
 95 | 
 96 |     // 把原始数据UID|MID|SCORE|TIMESTAMP 转换成评分流
 97 |     val ratingStream = kafkaStream.map {
 98 |       msg =>
 99 |         // """|""":scala中"""三个引号内可以直接敲回车替代\n，stripMargin取空格链接字符默认是|
100 |         val attr = msg.value().split("\\|")
101 |         (attr(0).toInt, attr(1).toInt, attr(2).toDouble, attr(3).toInt)
102 |     }
103 | 
104 |     ratingStream.print()
105 | 
106 |     ratingStream.foreachRDD {
107 |       rdd => rdd.foreach {
108 |           case (uid, mid, score, timestamp) => {
109 |             // 从redis里获取用户最近的K次评分:Array[(mid, score)]
110 |             val userRecentlyRatings = getUserRecentlyRating(MAX_USER_RATINGS_NUM, uid, ConnHelper.jedis)
111 | 
112 |             // 从相似度矩阵中获取备选列表，Array[mid]
113 |             val candidateMovies = getTopSimMovies(MAX_SIM_MOVIES_NUM, mid, uid, simMovieMatrixBroadCast.value)
114 | 
115 |             // 计算备选元素与用户最近评分物品相似度+加强减弱因子，Array[(mid, score)]
116 |             val streamRecs = computeMovieScores(candidateMovies, userRecentlyRatings, simMovieMatrixBroadCast.value)
117 | 
118 |             // 数据保存到mongodb
119 |             saveDataToMongoDB(uid, streamRecs)
120 |           }
121 |         }
122 |     }
123 |     // 开始接收和处理数据
124 |     ssc.start()
125 | 
126 |     println(">>>>>>>>>>>>>>> streaming started!")
127 | 
128 |     ssc.awaitTermination()
129 | 
130 |   }
131 | 
132 |   /**
133 |     * 从redis获取用户最近k次评分
134 |     * @param num 数量
135 |     * @param uid 用户id
136 |     * @param jedis
137 |     * @return
138 |     */
139 |   def getUserRecentlyRating(num: Int, uid: Int, jedis: Jedis): Array[(Int, Double)] = {
140 |     // java list to scala.BufferList
141 |     import scala.collection.JavaConversions._
142 |     // key{uid:UID}, value{MID:SCORE}
143 |     jedis.lrange("uid:" + uid, 0, num - 1)
144 |       .map {
145 |         item =>
146 |           val attr = item.split("\\:")
147 |           (attr(0).trim.toInt, attr(1).trim.toDouble)
148 |       }
149 |       .toArray
150 |   }
151 | 
152 |   /**
153 |     * 相似从相似度矩阵中获取备选列表：过滤已评分的
154 |     * @param num       相似电影的数量
155 |     * @param mid       当前电影ID
156 |     * @param uid       当前评分用户ID
157 |     * @param simMovies 相似度矩阵
158 |     * @return
159 |     */
160 |   def getTopSimMovies(num: Int, mid: Int, uid: Int, simMovies: scala.collection.Map[Int, scala.collection.immutable.Map[Int, Double]])
161 |                      (implicit mongoConfig: MongoConfig): Array[Int] = {
162 |     val allSimMovies = simMovies(mid).toArray
163 | 
164 |     val ratingExist = ConnHelper.mongoClient(mongoConfig.db)(MONGODB_RATING_COLLECTION)
165 |       .find(MongoDBObject("uid" -> uid))
166 |       .map {
167 |         item => item.get("mid").toString.toInt
168 |       }.toArray
169 | 
170 |     allSimMovies.filter(x => !ratingExist.contains(x._1))
171 |       .sortWith(_._2 > _._2)
172 |       .take(num)
173 |       .map(x => x._1)
174 |   }
175 | 
176 |   /**
177 |     * 计算备选元素与用户最近评分物品相似度+加强减弱因子:核心算法
178 |     * @param candidateMovies
179 |     * @param userRecentlyRatings
180 |     * @param simMovies
181 |     * @return
182 |     */
183 |   def computeMovieScores(candidateMovies: Array[Int], userRecentlyRatings: Array[(Int, Double)],
184 |                          simMovies: scala.collection.Map[Int, scala.collection.immutable.Map[Int, Double]]): Array[(Int, Double)] = {
185 |     val scores = scala.collection.mutable.ArrayBuffer[(Int, Double)]()
186 |     // 增强减弱因子
187 |     val increMap = scala.collection.mutable.HashMap[Int, Int]()
188 |     val decreMap = scala.collection.mutable.HashMap[Int, Int]()
189 | 
190 |     for (candidateMovie <- candidateMovies; userRecentlyRating <- userRecentlyRatings) {
191 |       val simScore = getMoviesSimScore(candidateMovie, userRecentlyRating._1, simMovies)
192 | 
193 |       if (simScore > 0.7) {
194 |         scores += ((candidateMovie, simScore * userRecentlyRating._2))
195 |         if (userRecentlyRating._2 > 3) {
196 |           increMap(candidateMovie) = increMap.getOrElse(candidateMovie, 0) + 1
197 |         } else {
198 |           decreMap(candidateMovie) = decreMap.getOrElse(candidateMovie, 0) + 1
199 |         }
200 |       }
201 |     }
202 | 
203 |     scores.groupBy(_._1).map {
204 |       case (mid, scoreList) =>
205 |         (mid, scoreList.map(_._2).sum / scoreList.length + log(increMap.getOrElse(mid, 1)) - log(decreMap.getOrElse(mid, 1)))
206 |     }.toArray.sortWith(_._2 > _._2)
207 |   }
208 | 
209 |   /**
210 |     * 物品间相似度：通过查找已计算的相似矩阵（broadCast map结构很方便）
211 |     * @param mid1
212 |     * @param mid2
213 |     * @param simMovies
214 |     * @return
215 |     */
216 |   def getMoviesSimScore(mid1: Int, mid2: Int, simMovies: scala.collection.Map[Int,
217 |     scala.collection.immutable.Map[Int, Double]]): Double = {
218 | 
219 |     simMovies.get(mid1) match {
220 |       case Some(sims) => sims.get(mid2) match {
221 |         case Some(score) => score
222 |         case None => 0.0
223 |       }
224 |       case None => 0.0
225 |     }
226 |   }
227 | 
228 |   /**
229 |     * 求对数
230 |     * @param m
231 |     * @return
232 |     */
233 |   def log(m: Int): Double = {
234 |     val N = 10
235 |     math.log(m) / math.log(N)
236 |   }
237 | 
238 |   def saveDataToMongoDB(uid: Int, streamRecs: Array[(Int, Double)])(implicit mongoConfig: MongoConfig): Unit = {
239 |     // 定义到StreamRecs表的连接
240 |     val streamRecsCollection = ConnHelper.mongoClient(mongoConfig.db)(MONGODB_STREAM_RECS_COLLECTION)
241 | 
242 |     // 如果表中已有uid对应的数据，则删除
243 |     streamRecsCollection.findAndRemove(MongoDBObject("uid" -> uid))
244 |     // 将streamRecs数据存入表中
245 |     streamRecsCollection.insert(MongoDBObject("uid" -> uid,
246 |       "recs" -> streamRecs.map(x => MongoDBObject("mid" -> x._1, "score" -> x._2))))
247 |   }
248 | 
249 | }
250 | 


--------------------------------------------------------------------------------
/Recommendation/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>RecommendationSystem</artifactId>
 7 |         <groupId>com.z</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 | 
11 |     <modelVersion>4.0.0</modelVersion>
12 |     <artifactId>Recommendation</artifactId>
13 |     <packaging>pom</packaging>
14 | 
15 |     <modules>
16 |         <module>StatisticsRecommendation</module>
17 |         <module>OfflineRecommendation</module>
18 |         <module>StreamingRecommendation</module>
19 |         <module>ContentRecommendation</module>
20 |         <module>KafkaStream</module>
21 |         <module>ItemCFRecommendation</module>
22 |     </modules>
23 | 
24 |     <dependencyManagement>
25 |         <dependencies>
26 |             <!-- 引入Spark相关的Jar包 -->
27 |             <dependency>
28 |                 <groupId>org.apache.spark</groupId>
29 |                 <artifactId>spark-core_2.11</artifactId>
30 |                 <version>${spark.version}</version>
31 |             </dependency>
32 |             <dependency>
33 |                 <groupId>org.apache.spark</groupId>
34 |                 <artifactId>spark-sql_2.11</artifactId>
35 |                 <version>${spark.version}</version>
36 |             </dependency>
37 |             <dependency>
38 |                 <groupId>org.apache.spark</groupId>
39 |                 <artifactId>spark-streaming_2.11</artifactId>
40 |                 <version>${spark.version}</version>
41 |             </dependency>
42 |             <dependency>
43 |                 <groupId>org.apache.spark</groupId>
44 |                 <artifactId>spark-mllib_2.11</artifactId>
45 |                 <version>${spark.version}</version>
46 |             </dependency>
47 |             <dependency>
48 |                 <groupId>org.apache.spark</groupId>
49 |                 <artifactId>spark-graphx_2.11</artifactId>
50 |                 <version>${spark.version}</version>
51 |             </dependency>
52 |          <!--   <dependency>
53 |                 <groupId>org.scala-lang</groupId>
54 |                 <artifactId>scala-library</artifactId>
55 |                 <version>${scala.version}</version>
56 |             </dependency>-->
57 |         </dependencies>
58 |     </dependencyManagement>
59 | 
60 |     <build>
61 |         <plugins>
62 |             <!-- 父项目已声明该plugin，子项目在引入的时候，不用声明版本和已经声明的配置 -->
63 |             <plugin>
64 |                 <groupId>net.alchim31.maven</groupId>
65 |                 <artifactId>scala-maven-plugin</artifactId>
66 |             </plugin>
67 |         </plugins>
68 |     </build>
69 | 
70 | 
71 | </project>


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>com.z</groupId>
  8 |     <artifactId>RecommendationSystem</artifactId>
  9 |     <packaging>pom</packaging>
 10 |     <version>1.0-SNAPSHOT</version>
 11 | 
 12 |     <modules>
 13 |         <module>Recommendation</module>
 14 |     </modules>
 15 | 
 16 |     <properties>
 17 |         <log4j.version>1.2.17</log4j.version>
 18 |         <slf4j.version>1.7.22</slf4j.version>
 19 |         <log4j-slf4j.version>2.8.2</log4j-slf4j.version>
 20 |         <mongodb-spark.version>2.0.0</mongodb-spark.version>
 21 |         <casbah.version>3.1.1</casbah.version>
 22 |         <elasticsearch-spark.version>6.3.1</elasticsearch-spark.version>
 23 |         <elasticsearch.version>6.3.1</elasticsearch.version>
 24 |         <redis.version>2.9.0</redis.version>
 25 |         <kafka.version>0.11.0.2</kafka.version>
 26 |         <spark.version>2.1.1</spark.version>
 27 |         <scala.version>2.11.8</scala.version>
 28 |         <jblas.version>1.2.1</jblas.version>
 29 |     </properties>
 30 | 
 31 |     <dependencies>
 32 |         <!--引入共同的日志管理工具-->
 33 |         <dependency>
 34 |             <groupId>org.slf4j</groupId>
 35 |             <artifactId>jcl-over-slf4j</artifactId>
 36 |             <version>${slf4j.version}</version>
 37 |         </dependency>
 38 |         <dependency>
 39 |             <groupId>org.slf4j</groupId>
 40 |             <artifactId>slf4j-api</artifactId>
 41 |             <version>${slf4j.version}</version>
 42 |         </dependency>
 43 |         <dependency>
 44 |             <groupId>org.slf4j</groupId>
 45 |             <artifactId>slf4j-log4j12</artifactId>
 46 |             <version>${slf4j.version}</version>
 47 |         </dependency>
 48 |         <dependency>
 49 |             <groupId>log4j</groupId>
 50 |             <artifactId>log4j</artifactId>
 51 |             <version>${log4j.version}</version>
 52 |         </dependency>
 53 |         <dependency>
 54 |             <groupId>org.apache.logging.log4j</groupId>
 55 |             <artifactId>log4j-to-slf4j</artifactId>
 56 |             <version>${log4j-slf4j.version}</version>
 57 |         </dependency>
 58 |     </dependencies>
 59 | 
 60 |     <build>
 61 |         <!--声明并引入子项目共有的插件-->
 62 |         <plugins>
 63 |             <plugin>
 64 |                 <groupId>org.apache.maven.plugins</groupId>
 65 |                 <artifactId>maven-compiler-plugin</artifactId>
 66 |                 <version>3.6.1</version>
 67 |                 <!--所有的编译用JDK1.8-->
 68 |                 <configuration>
 69 |                     <source>1.8</source>
 70 |                     <target>1.8</target>
 71 |                     <encoding>utf8</encoding>
 72 |                 </configuration>
 73 |             </plugin>
 74 |         </plugins>
 75 | 
 76 |         <pluginManagement>
 77 |             <plugins>
 78 |                 <!--maven的打包插件-->
 79 |                 <plugin>
 80 |                     <groupId>org.apache.maven.plugins</groupId>
 81 |                     <artifactId>maven-assembly-plugin</artifactId>
 82 |                     <version>3.0.0</version>
 83 |                     <executions>
 84 |                         <execution>
 85 |                             <id>make-assembly</id>
 86 |                             <phase>package</phase>
 87 |                             <goals>
 88 |                                 <goal>single</goal>
 89 |                             </goals>
 90 |                         </execution>
 91 |                     </executions>
 92 |                 </plugin>
 93 | 
 94 |                 <!--该插件用于将scala代码编译成class文件-->
 95 |                 <plugin>
 96 |                     <groupId>net.alchim31.maven</groupId>
 97 |                     <artifactId>scala-maven-plugin</artifactId>
 98 |                     <version>3.2.2</version>
 99 |                     <executions>
100 |                         <!--绑定到maven的编译阶段-->
101 |                         <execution>
102 |                             <goals>
103 |                                 <goal>compile</goal>
104 |                                 <goal>testCompile</goal>
105 |                             </goals>
106 |                         </execution>
107 |                     </executions>
108 |                 </plugin>
109 |             </plugins>
110 |         </pluginManagement>
111 |     </build>
112 | 
113 | </project>


--------------------------------------------------------------------------------
/readme.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wolf-song-ml/RecommendationSystem/6a459d6472131d697d6cf450a249caa3d6a465e4/readme.docx


--------------------------------------------------------------------------------
/~$readme.docx:
--------------------------------------------------------------------------------
1 | wolf                                                  w o l f                                                                                                   


--------------------------------------------------------------------------------