├── .gitignore
├── .mvn
└── wrapper
│ ├── maven-wrapper.jar
│ └── maven-wrapper.properties
├── README.md
├── data
├── README.md
├── news.sql
├── user.sql
└── useroperation.sql
├── mvnw
├── mvnw.cmd
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── kadoufall
│ │ └── recommender
│ │ ├── RecommenderApplication.java
│ │ ├── dao
│ │ ├── NewsRepository.java
│ │ ├── UserOperationRepository.java
│ │ ├── UserRepository.java
│ │ ├── UtilRepositoryCustom.java
│ │ └── UtilRepositoryImpl.java
│ │ ├── model
│ │ ├── News.java
│ │ ├── User.java
│ │ └── UserOperation.java
│ │ ├── service
│ │ ├── contentBased
│ │ │ ├── ContentBasedRecommenderService.java
│ │ │ ├── ContentBasedRecommenderServiceImpl.java
│ │ │ ├── NewsItemSimilarity.java
│ │ │ └── TrainVsmModel.java
│ │ ├── hot
│ │ │ ├── HotRecommenderService.java
│ │ │ └── HotRecommenderServiceImpl.java
│ │ ├── userCF
│ │ │ ├── UserCFRecommenderService.java
│ │ │ └── UserCFRecommenderServiceImpl.java
│ │ └── util
│ │ │ ├── UtilService.java
│ │ │ └── UtilServiceImpl.java
│ │ └── web
│ │ └── RecommendController.java
└── resources
│ ├── README.md
│ ├── application.properties
│ ├── caixin.base
│ ├── caixinWords.txt
│ ├── log4j.properties
│ └── stopword.txt
└── test
└── java
└── com
└── kadoufall
└── recommender
├── RecommenderApplicationTests.java
├── dao
└── DomainTest.java
├── recommender
├── EvaluateTest.java
└── MahoutTest.java
└── service
├── ContentBasedRecommenderServiceTest.java
└── HotRecommenderServiceTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | !.mvn/wrapper/maven-wrapper.jar
3 |
4 | ### STS ###
5 | .apt_generated
6 | .classpath
7 | .factorypath
8 | .project
9 | .settings
10 | .springBeans
11 | .sts4-cache
12 |
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 |
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/
26 |
27 | /logs/*
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kadoufall/news-recommender/ccecbe6b918ebe537442976dbb6fbc17aa92f958/.mvn/wrapper/maven-wrapper.jar
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.5.3/apache-maven-3.5.3-bin.zip
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 基于Mahout的新闻推荐系统
2 |
3 | ## 相关技术
4 |
5 | - 推荐算法
6 | - 基于用户的协同过滤
7 | - 基于内容的推荐
8 | - 基于热点的推荐
9 |
10 | - [Mahout](http://mahout.apache.org/):整体框架,实现了协同过滤
11 | - [Deeplearning4j](https://deeplearning4j.org/),构建VSM
12 | - [Jieba](https://github.com/fxsjy/jieba):分词,关键词提取
13 | - [HanLP](https://github.com/hankcs/HanLP):分词,关键词提取
14 | - [Spring Boot](https://spring.io/):提供API、ORM
15 |
16 |
17 | ## 关键实现
18 |
19 | ### 基于用户的协同过滤
20 | - 直接调用Mahout相关接口即可
21 | - 选择不同的用户相似度度量方法,这里选择了基于谷本系数、基于对数似然和基于曼哈顿距离
22 |
23 | ### 基于内容的推荐
24 | - 对新闻文本进行分词
25 | - 调用Deeplearning4j中构建paragraphvector的方法,通过doc2vec构建VSM
26 | - 用Gensim会更方便点
27 |
28 |
29 | ### 基于热点的推荐
30 | - 统计最高浏览量
31 | - 过滤一定时间前的新闻保证热点的准确
32 |
33 | ## 评测指标
34 | - [测试数据集](https://pan.baidu.com/s/1Y84iLIY8RbO_6oFTEm1oGA#list/path=%2F)
35 | - F1-Measure(precision + recall)
36 |
37 | |算法|最近邻数量K|推荐数N|F1-Measure|
38 | |:---:|:---:|:---:|:---:|
39 | |UserCF--Tanimoto|20|11|0.481591183699049|
40 | |UserCF--LogLike|10|11|0.486337485027766|
41 | |UserCF--CityBlock|30|8|0.424612102745937|
42 | |ContentBased|-|5|0.0491655390166893|
43 | |HotSpots|-|14|0.118524972063865|
44 |
45 |
46 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | ## 预处理后的 MySQL 结构和数据
2 |
3 | - news
4 | - user
5 | - userOperation
--------------------------------------------------------------------------------
/mvnw:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # ----------------------------------------------------------------------------
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | # ----------------------------------------------------------------------------
20 |
21 | # ----------------------------------------------------------------------------
22 | # Maven2 Start Up Batch script
23 | #
24 | # Required ENV vars:
25 | # ------------------
26 | # JAVA_HOME - location of a JDK home dir
27 | #
28 | # Optional ENV vars
29 | # -----------------
30 | # M2_HOME - location of maven2's installed home dir
31 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven
32 | # e.g. to debug Maven itself, use
33 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
34 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files
35 | # ----------------------------------------------------------------------------
36 |
37 | if [ -z "$MAVEN_SKIP_RC" ] ; then
38 |
39 | if [ -f /etc/mavenrc ] ; then
40 | . /etc/mavenrc
41 | fi
42 |
43 | if [ -f "$HOME/.mavenrc" ] ; then
44 | . "$HOME/.mavenrc"
45 | fi
46 |
47 | fi
48 |
49 | # OS specific support. $var _must_ be set to either true or false.
50 | cygwin=false;
51 | darwin=false;
52 | mingw=false
53 | case "`uname`" in
54 | CYGWIN*) cygwin=true ;;
55 | MINGW*) mingw=true;;
56 | Darwin*) darwin=true
57 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
58 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
59 | if [ -z "$JAVA_HOME" ]; then
60 | if [ -x "/usr/libexec/java_home" ]; then
61 | export JAVA_HOME="`/usr/libexec/java_home`"
62 | else
63 | export JAVA_HOME="/Library/Java/Home"
64 | fi
65 | fi
66 | ;;
67 | esac
68 |
69 | if [ -z "$JAVA_HOME" ] ; then
70 | if [ -r /etc/gentoo-release ] ; then
71 | JAVA_HOME=`java-config --jre-home`
72 | fi
73 | fi
74 |
75 | if [ -z "$M2_HOME" ] ; then
76 | ## resolve links - $0 may be a link to maven's home
77 | PRG="$0"
78 |
79 | # need this for relative symlinks
80 | while [ -h "$PRG" ] ; do
81 | ls=`ls -ld "$PRG"`
82 | link=`expr "$ls" : '.*-> \(.*\)$'`
83 | if expr "$link" : '/.*' > /dev/null; then
84 | PRG="$link"
85 | else
86 | PRG="`dirname "$PRG"`/$link"
87 | fi
88 | done
89 |
90 | saveddir=`pwd`
91 |
92 | M2_HOME=`dirname "$PRG"`/..
93 |
94 | # make it fully qualified
95 | M2_HOME=`cd "$M2_HOME" && pwd`
96 |
97 | cd "$saveddir"
98 | # echo Using m2 at $M2_HOME
99 | fi
100 |
101 | # For Cygwin, ensure paths are in UNIX format before anything is touched
102 | if $cygwin ; then
103 | [ -n "$M2_HOME" ] &&
104 | M2_HOME=`cygpath --unix "$M2_HOME"`
105 | [ -n "$JAVA_HOME" ] &&
106 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
107 | [ -n "$CLASSPATH" ] &&
108 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
109 | fi
110 |
111 | # For Migwn, ensure paths are in UNIX format before anything is touched
112 | if $mingw ; then
113 | [ -n "$M2_HOME" ] &&
114 | M2_HOME="`(cd "$M2_HOME"; pwd)`"
115 | [ -n "$JAVA_HOME" ] &&
116 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
117 | # TODO classpath?
118 | fi
119 |
120 | if [ -z "$JAVA_HOME" ]; then
121 | javaExecutable="`which javac`"
122 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
123 | # readlink(1) is not available as standard on Solaris 10.
124 | readLink=`which readlink`
125 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
126 | if $darwin ; then
127 | javaHome="`dirname \"$javaExecutable\"`"
128 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
129 | else
130 | javaExecutable="`readlink -f \"$javaExecutable\"`"
131 | fi
132 | javaHome="`dirname \"$javaExecutable\"`"
133 | javaHome=`expr "$javaHome" : '\(.*\)/bin'`
134 | JAVA_HOME="$javaHome"
135 | export JAVA_HOME
136 | fi
137 | fi
138 | fi
139 |
140 | if [ -z "$JAVACMD" ] ; then
141 | if [ -n "$JAVA_HOME" ] ; then
142 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
143 | # IBM's JDK on AIX uses strange locations for the executables
144 | JAVACMD="$JAVA_HOME/jre/sh/java"
145 | else
146 | JAVACMD="$JAVA_HOME/bin/java"
147 | fi
148 | else
149 | JAVACMD="`which java`"
150 | fi
151 | fi
152 |
153 | if [ ! -x "$JAVACMD" ] ; then
154 | echo "Error: JAVA_HOME is not defined correctly." >&2
155 | echo " We cannot execute $JAVACMD" >&2
156 | exit 1
157 | fi
158 |
159 | if [ -z "$JAVA_HOME" ] ; then
160 | echo "Warning: JAVA_HOME environment variable is not set."
161 | fi
162 |
163 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
164 |
165 | # traverses directory structure from process work directory to filesystem root
166 | # first directory with .mvn subdirectory is considered project base directory
167 | find_maven_basedir() {
168 |
169 | if [ -z "$1" ]
170 | then
171 | echo "Path not specified to find_maven_basedir"
172 | return 1
173 | fi
174 |
175 | basedir="$1"
176 | wdir="$1"
177 | while [ "$wdir" != '/' ] ; do
178 | if [ -d "$wdir"/.mvn ] ; then
179 | basedir=$wdir
180 | break
181 | fi
182 | # workaround for JBEAP-8937 (on Solaris 10/Sparc)
183 | if [ -d "${wdir}" ]; then
184 | wdir=`cd "$wdir/.."; pwd`
185 | fi
186 | # end of workaround
187 | done
188 | echo "${basedir}"
189 | }
190 |
191 | # concatenates all lines of a file
192 | concat_lines() {
193 | if [ -f "$1" ]; then
194 | echo "$(tr -s '\n' ' ' < "$1")"
195 | fi
196 | }
197 |
198 | BASE_DIR=`find_maven_basedir "$(pwd)"`
199 | if [ -z "$BASE_DIR" ]; then
200 | exit 1;
201 | fi
202 |
203 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
204 | echo $MAVEN_PROJECTBASEDIR
205 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
206 |
207 | # For Cygwin, switch paths to Windows format before running java
208 | if $cygwin; then
209 | [ -n "$M2_HOME" ] &&
210 | M2_HOME=`cygpath --path --windows "$M2_HOME"`
211 | [ -n "$JAVA_HOME" ] &&
212 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
213 | [ -n "$CLASSPATH" ] &&
214 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
215 | [ -n "$MAVEN_PROJECTBASEDIR" ] &&
216 | MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
217 | fi
218 |
219 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
220 |
221 | exec "$JAVACMD" \
222 | $MAVEN_OPTS \
223 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
224 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
225 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
226 |
--------------------------------------------------------------------------------
/mvnw.cmd:
--------------------------------------------------------------------------------
1 | @REM ----------------------------------------------------------------------------
2 | @REM Licensed to the Apache Software Foundation (ASF) under one
3 | @REM or more contributor license agreements. See the NOTICE file
4 | @REM distributed with this work for additional information
5 | @REM regarding copyright ownership. The ASF licenses this file
6 | @REM to you under the Apache License, Version 2.0 (the
7 | @REM "License"); you may not use this file except in compliance
8 | @REM with the License. You may obtain a copy of the License at
9 | @REM
10 | @REM http://www.apache.org/licenses/LICENSE-2.0
11 | @REM
12 | @REM Unless required by applicable law or agreed to in writing,
13 | @REM software distributed under the License is distributed on an
14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | @REM KIND, either express or implied. See the License for the
16 | @REM specific language governing permissions and limitations
17 | @REM under the License.
18 | @REM ----------------------------------------------------------------------------
19 |
20 | @REM ----------------------------------------------------------------------------
21 | @REM Maven2 Start Up Batch script
22 | @REM
23 | @REM Required ENV vars:
24 | @REM JAVA_HOME - location of a JDK home dir
25 | @REM
26 | @REM Optional ENV vars
27 | @REM M2_HOME - location of maven2's installed home dir
28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending
30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
31 | @REM e.g. to debug Maven itself, use
32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
34 | @REM ----------------------------------------------------------------------------
35 |
36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
37 | @echo off
38 | @REM enable echoing my setting MAVEN_BATCH_ECHO to 'on'
39 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
40 |
41 | @REM set %HOME% to equivalent of $HOME
42 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
43 |
44 | @REM Execute a user defined script before this one
45 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
46 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending
47 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
48 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
49 | :skipRcPre
50 |
51 | @setlocal
52 |
53 | set ERROR_CODE=0
54 |
55 | @REM To isolate internal variables from possible post scripts, we use another setlocal
56 | @setlocal
57 |
58 | @REM ==== START VALIDATION ====
59 | if not "%JAVA_HOME%" == "" goto OkJHome
60 |
61 | echo.
62 | echo Error: JAVA_HOME not found in your environment. >&2
63 | echo Please set the JAVA_HOME variable in your environment to match the >&2
64 | echo location of your Java installation. >&2
65 | echo.
66 | goto error
67 |
68 | :OkJHome
69 | if exist "%JAVA_HOME%\bin\java.exe" goto init
70 |
71 | echo.
72 | echo Error: JAVA_HOME is set to an invalid directory. >&2
73 | echo JAVA_HOME = "%JAVA_HOME%" >&2
74 | echo Please set the JAVA_HOME variable in your environment to match the >&2
75 | echo location of your Java installation. >&2
76 | echo.
77 | goto error
78 |
79 | @REM ==== END VALIDATION ====
80 |
81 | :init
82 |
83 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
84 | @REM Fallback to current working directory if not found.
85 |
86 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
87 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
88 |
89 | set EXEC_DIR=%CD%
90 | set WDIR=%EXEC_DIR%
91 | :findBaseDir
92 | IF EXIST "%WDIR%"\.mvn goto baseDirFound
93 | cd ..
94 | IF "%WDIR%"=="%CD%" goto baseDirNotFound
95 | set WDIR=%CD%
96 | goto findBaseDir
97 |
98 | :baseDirFound
99 | set MAVEN_PROJECTBASEDIR=%WDIR%
100 | cd "%EXEC_DIR%"
101 | goto endDetectBaseDir
102 |
103 | :baseDirNotFound
104 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
105 | cd "%EXEC_DIR%"
106 |
107 | :endDetectBaseDir
108 |
109 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
110 |
111 | @setlocal EnableExtensions EnableDelayedExpansion
112 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
113 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
114 |
115 | :endReadAdditionalConfig
116 |
117 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
118 |
119 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
120 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
121 |
122 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
123 | if ERRORLEVEL 1 goto error
124 | goto end
125 |
126 | :error
127 | set ERROR_CODE=1
128 |
129 | :end
130 | @endlocal & set ERROR_CODE=%ERROR_CODE%
131 |
132 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
133 | @REM check for post script, once with legacy .bat ending and once with .cmd ending
134 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
135 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
136 | :skipRcPost
137 |
138 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
139 | if "%MAVEN_BATCH_PAUSE%" == "on" pause
140 |
141 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
142 |
143 | exit /B %ERROR_CODE%
144 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.kadoufall
7 | recommender
8 | 0.0.1-SNAPSHOT
9 | jar
10 |
11 | recommender
12 | Recommender for Todayim
13 |
14 |
15 | org.springframework.boot
16 | spring-boot-starter-parent
17 | 2.0.1.RELEASE
18 |
19 |
20 |
21 |
22 | UTF-8
23 | UTF-8
24 | 1.8
25 |
26 |
27 |
28 |
29 | org.springframework.boot
30 | spring-boot-starter
31 |
32 |
33 | org.springframework.boot
34 | spring-boot-starter-logging
35 |
36 |
37 |
38 |
39 |
40 | org.springframework.boot
41 | spring-boot-starter-log4j
42 |
43 |
44 |
45 |
46 | org.springframework.boot
47 | spring-boot-starter-data-jpa
48 |
49 |
50 | org.springframework.boot
51 | spring-boot-starter-integration
52 |
53 |
54 | org.springframework.boot
55 | spring-boot-starter-jdbc
56 |
57 |
58 | org.springframework.boot
59 | spring-boot-starter-web
60 |
61 |
62 |
63 | org.springframework.boot
64 | spring-boot-devtools
65 | runtime
66 |
67 |
68 |
69 | mysql
70 | mysql-connector-java
71 | runtime
72 |
73 |
74 |
75 | org.springframework.boot
76 | spring-boot-starter-test
77 | test
78 |
79 |
80 |
81 | org.springframework.boot
82 | spring-boot-configuration-processor
83 | true
84 |
85 |
86 |
87 | org.projectlombok
88 | lombok
89 |
90 |
91 |
92 |
93 | org.apache.mahout
94 | mahout-core
95 | 0.9
96 |
97 |
98 |
99 |
100 | org.apache.mahout
101 | mahout-math
102 | 0.13.0
103 |
104 |
105 |
106 |
107 | org.apache.mahout
108 | mahout-integration
109 | 0.13.0
110 |
111 |
112 |
113 | com.alibaba
114 | fastjson
115 | 1.2.46
116 |
117 |
118 |
119 | org.ansj
120 | ansj_seg
121 | 5.1.1
122 |
123 |
124 |
125 |
126 | org.deeplearning4j
127 | deeplearning4j-core
128 | 1.0.0-alpha
129 |
130 |
131 |
132 |
133 | org.deeplearning4j
134 | deeplearning4j-nlp
135 | 1.0.0-alpha
136 |
137 |
138 |
139 |
140 | org.deeplearning4j
141 | deeplearning4j-nn
142 | 1.0.0-alpha
143 |
144 |
145 |
146 |
147 | org.deeplearning4j
148 | arbiter-core
149 | 1.0.0-alpha
150 |
151 |
152 |
153 |
154 | org.deeplearning4j
155 | rl4j-api
156 | 1.0.0-alpha
157 |
158 |
159 |
160 |
161 | org.deeplearning4j
162 | deeplearning4j-modelimport
163 | 1.0.0-alpha
164 |
165 |
166 |
167 |
168 | org.deeplearning4j
169 | deeplearning4j-datavec-iterators
170 | 1.0.0-alpha
171 |
172 |
173 |
174 |
175 | org.nd4j
176 | nd4j-api
177 | 1.0.0-alpha
178 |
179 |
180 |
181 |
182 | org.nd4j
183 | nd4j-x86
184 | 0.4-rc3.8
185 | test
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 | org.bytedeco
199 | javacpp
200 | 1.4.1
201 |
202 |
203 |
204 |
205 | org.apache.poi
206 | poi
207 | 3.17
208 |
209 |
210 |
211 |
212 | org.apache.poi
213 | poi-ooxml
214 | 3.17
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 | org.springframework.boot
225 | spring-boot-maven-plugin
226 |
227 |
228 |
229 |
230 |
231 |
232 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/RecommenderApplication.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class RecommenderApplication {
8 |
9 | public static void main(String[] args) {
10 | SpringApplication.run(RecommenderApplication.class, args);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/dao/NewsRepository.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.dao;
2 |
3 | import com.kadoufall.recommender.model.News;
4 | import org.springframework.data.domain.Pageable;
5 | import org.springframework.data.jpa.repository.JpaRepository;
6 | import org.springframework.data.jpa.repository.Query;
7 | import org.springframework.data.repository.query.Param;
8 |
9 | import java.util.List;
10 |
11 | public interface NewsRepository extends JpaRepository {
12 |
13 | News findById(long id);
14 |
15 |
16 |
17 | @Query("from News ORDER BY viewNum DESC")
18 | // List findHostestNews(Pageable pageable);
19 | List findHostestNews();
20 |
21 |
22 |
23 | }
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/dao/UserOperationRepository.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.dao;
2 |
3 | import com.kadoufall.recommender.model.UserOperation;
4 | import org.springframework.data.jpa.repository.JpaRepository;
5 |
6 | import java.util.List;
7 |
8 | public interface UserOperationRepository extends JpaRepository {
9 |
10 | UserOperation findById(long id);
11 |
12 | List findByUserId(long userId);
13 |
14 | }
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/dao/UserRepository.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.dao;
2 |
3 | import com.kadoufall.recommender.model.User;
4 | import org.springframework.data.jpa.repository.JpaRepository;
5 | import org.springframework.data.jpa.repository.Query;
6 | import org.springframework.data.repository.query.Param;
7 |
8 | public interface UserRepository extends JpaRepository {
9 |
10 | User findById(long id);
11 |
12 |
13 | @Query("from User u where u.id=:id")
14 | User findUser(@Param("id") long id);
15 |
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/dao/UtilRepositoryCustom.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.dao;
2 |
3 | import com.kadoufall.recommender.model.News;
4 | import org.springframework.stereotype.Repository;
5 |
6 | import java.sql.Timestamp;
7 | import java.util.List;
8 |
9 | public interface UtilRepositoryCustom {
10 | List findSeenNewsIds(long userId);
11 |
12 | List findSeenNewsIdsBefore(long userId, Timestamp timestamp);
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/dao/UtilRepositoryImpl.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.dao;
2 |
3 | import com.kadoufall.recommender.model.UserOperation;
4 | import org.springframework.beans.factory.annotation.Autowired;
5 | import org.springframework.stereotype.Repository;
6 |
7 | import java.sql.Timestamp;
8 | import java.util.List;
9 | import java.util.stream.Collectors;
10 |
11 |
12 | @Repository
13 | public class UtilRepositoryImpl implements UtilRepositoryCustom {
14 | @Autowired
15 | private UserOperationRepository userOperationRepository;
16 |
17 |
18 | @Override
19 | public List findSeenNewsIds(long userId) {
20 | List userOperations = this.userOperationRepository.findByUserId(userId);
21 | return userOperations.parallelStream().map(UserOperation::getNewsId).collect(Collectors.toList());
22 | }
23 |
24 | @Override
25 | public List findSeenNewsIdsBefore(long userId, Timestamp timestamp) {
26 | List userOperations = this.userOperationRepository.findByUserId(userId);
27 | return userOperations
28 | .parallelStream()
29 | .filter(u -> u.getReadTime().before(timestamp))
30 | .map(UserOperation::getNewsId)
31 | .collect(Collectors.toList());
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/model/News.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.model;
2 |
3 | import lombok.Data;
4 |
5 | import javax.persistence.*;
6 | import java.sql.Timestamp;
7 |
8 | @Entity
9 | @Table(name = "news")
10 | @Data
11 | public class News {
12 | @Id
13 | @GeneratedValue(strategy = GenerationType.IDENTITY)
14 | @Column(name = "id")
15 | private long id;
16 |
17 | @Column(name = "title")
18 | private String title;
19 |
20 | @Lob
21 | @Column(name = "passageContent")
22 | private String passageContent;
23 |
24 | @Column(name = "postTime")
25 | private Timestamp postTime;
26 |
27 | @Column(name = "viewNum")
28 | private int viewNum;
29 |
30 | @Column(name = "commentNum")
31 | private int commentNum;
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/model/User.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.model;
2 |
3 | import lombok.Data;
4 |
5 | import javax.persistence.*;
6 |
7 | @Entity
8 | @Table(name = "user")
9 | @Data
10 | public class User {
11 | @Id
12 | @GeneratedValue(strategy = GenerationType.IDENTITY)
13 | @Column(name = "id")
14 | private long id;
15 |
16 | public User() {
17 |
18 | }
19 |
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/model/UserOperation.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.model;
2 |
3 | import lombok.Data;
4 | import lombok.Getter;
5 |
6 | import javax.persistence.*;
7 | import java.sql.Timestamp;
8 |
9 | @Entity
10 | @Table(name = "userOperation")
11 | @Data
12 | public class UserOperation {
13 | @Id
14 | @GeneratedValue(strategy = GenerationType.IDENTITY)
15 | @Column(name = "id")
16 | private long id;
17 |
18 | @Column(name = "userId")
19 | private long userId;
20 |
21 | @Column(name = "newsId")
22 | private long newsId;
23 |
24 | @Column(name = "readTime")
25 | private Timestamp readTime;
26 |
27 | @Column(name = "preference")
28 | private float preference;
29 |
30 | public UserOperation() {
31 |
32 | }
33 | }
34 |
35 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/service/contentBased/ContentBasedRecommenderService.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.service.contentBased;
2 |
3 | import java.util.List;
4 | import java.util.Map;
5 |
6 | public interface ContentBasedRecommenderService {
7 | List recommend(long userId);
8 |
9 | Map evaluate();
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/com/kadoufall/recommender/service/contentBased/ContentBasedRecommenderServiceImpl.java:
--------------------------------------------------------------------------------
1 | package com.kadoufall.recommender.service.contentBased;
2 |
3 | import com.kadoufall.recommender.dao.UserOperationRepository;
4 | import lombok.extern.log4j.Log4j;
5 | import org.apache.mahout.cf.taste.common.TasteException;
6 | import org.apache.mahout.cf.taste.eval.DataModelBuilder;
7 | import org.apache.mahout.cf.taste.eval.IRStatistics;
8 | import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
9 | import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
10 | import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
11 | import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
12 | import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
13 | import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLBooleanPrefJDBCDataModel;
14 | import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
15 | import org.apache.mahout.cf.taste.model.DataModel;
16 | import org.apache.mahout.cf.taste.recommender.RecommendedItem;
17 | import org.apache.mahout.cf.taste.recommender.Recommender;
18 | import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
19 | import org.datavec.api.util.ClassPathResource;
20 | import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
21 | import org.deeplearning4j.models.paragraphvectors.ParagraphVectors;
22 | import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
23 | import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
24 | import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
25 | import org.springframework.beans.factory.annotation.Autowired;
26 | import org.springframework.beans.factory.annotation.Value;
27 | import org.springframework.stereotype.Service;
28 |
29 | import javax.annotation.PostConstruct;
30 | import javax.sql.DataSource;
31 | import java.io.File;
32 | import java.io.FileOutputStream;
33 | import java.io.OutputStreamWriter;
34 | import java.util.*;
35 | import java.util.stream.Collectors;
36 | import java.util.stream.IntStream;
37 |
38 | @Log4j
39 | @Service
40 | public class ContentBasedRecommenderServiceImpl implements ContentBasedRecommenderService {
41 | @Value("${com.kadoufall.mahout.table-name}")
42 | private String tableName;
43 |
44 | @Value("${com.kadoufall.mahout.user-column}")
45 | private String userColumn;
46 |
47 | @Value("${com.kadoufall.mahout.item-column}")
48 | private String itemColumn;
49 |
50 | @Value("${com.kadoufall.mahout.pref-column}")
51 | private String prefColumn;
52 |
53 | @Value("${com.kadoufall.mahout.recommendNum}")
54 | private int recommendNum;
55 |
56 | @Autowired
57 | DataSource dataSource;
58 |
59 | @Autowired
60 | UserOperationRepository userOperationRepository;
61 |
62 | private ParagraphVectors vectors = null;
63 | private DataModel dataModel = null;
64 | private Recommender recommender = null;
65 |
66 | public ContentBasedRecommenderServiceImpl() {
67 |
68 | }
69 |
70 | @PostConstruct
71 | public void init() {
72 | try {
73 | this.dataModel = new MySQLBooleanPrefJDBCDataModel(
74 | dataSource,
75 | this.tableName,
76 | this.userColumn,
77 | this.itemColumn,
78 | this.prefColumn
79 | );
80 |
81 | // 注意需要先在 TrainVsmModel 中训练
82 | this.initParagraphVectors();
83 |
84 | // TODO: 移除 dataModel 中过期的用户浏览新闻行为,这些行为对计算用户相似度不再具有较大价值
85 |
86 | ItemSimilarity similarity = new NewsItemSimilarity(this.vectors);
87 | this.recommender = new GenericBooleanPrefItemBasedRecommender(dataModel, similarity);
88 | } catch (Exception e) {
89 | log.error(e.getMessage());
90 | }
91 | }
92 |
93 | private void initParagraphVectors() throws Exception {
94 | ClassPathResource resource = new ClassPathResource("/model.pv");
95 | TokenizerFactory t = new DefaultTokenizerFactory();
96 | t.setTokenPreProcessor(new CommonPreprocessor());
97 | this.vectors = WordVectorSerializer.readParagraphVectors(resource.getFile());
98 | vectors.setTokenizerFactory(t);
99 | }
100 |
101 | @Override
102 | public List recommend(long userId) {
103 | List ret = new ArrayList<>();
104 |
105 | try {
106 | List recommendedItems = recommender.recommend(userId, this.recommendNum);
107 | ret = recommendedItems.parallelStream().map(RecommendedItem::getItemID).collect(Collectors.toList());
108 | } catch (TasteException e) {
109 | log.error(e.getMessage());
110 | }
111 |
112 | return ret;
113 | }
114 |
115 | @Override
116 | public Map evaluate() {
117 | RecommenderIRStatsEvaluator evaluator =
118 | new GenericRecommenderIRStatsEvaluator();
119 | RecommenderBuilder recommenderBuilder = model1 -> {
120 | ItemSimilarity similarity = new NewsItemSimilarity(this.vectors);
121 | return new GenericBooleanPrefItemBasedRecommender(model1, similarity);
122 | };
123 | DataModelBuilder modelBuilder = trainingData -> new GenericBooleanPrefDataModel(
124 | GenericBooleanPrefDataModel.toDataMap(trainingData));
125 |
126 | List