├── .gitattributes ├── README.md ├── data-analysis ├── data-analysis.iml ├── pom.xml ├── src │ └── main │ │ ├── java │ │ └── com │ │ │ └── crow │ │ │ ├── domain │ │ │ ├── AuxiliaryModels │ │ │ │ └── NameValue.java │ │ │ ├── Comment.java │ │ │ ├── CommentMapper.java │ │ │ ├── CommentWord.java │ │ │ ├── CommentWordMapper.java │ │ │ ├── Post.java │ │ │ ├── PostMapper.java │ │ │ ├── TitleWord.java │ │ │ ├── TitleWordMapper.java │ │ │ ├── User.java │ │ │ └── UserMapper.java │ │ │ ├── service │ │ │ ├── CommentService.java │ │ │ ├── CommentWordService.java │ │ │ ├── PostService.java │ │ │ ├── TitleWordService.java │ │ │ └── UserService.java │ │ │ └── web │ │ │ └── EchartsController.java │ │ ├── resources │ │ ├── com │ │ │ └── crow │ │ │ │ └── domain │ │ │ │ ├── CommentWordMapper.xml │ │ │ │ ├── TitleWordMapper.xml │ │ │ │ └── UserMapper.xml │ │ ├── db.properties │ │ ├── log4j.properties │ │ ├── mybatis │ │ │ └── sqlMapConfig.xml │ │ └── spring │ │ │ ├── applicationContext-dao.xml │ │ │ ├── applicationContext-service.xml │ │ │ ├── applicationContext-transaction.xml │ │ │ └── springmvc.xml │ │ └── webapp │ │ ├── WEB-INF │ │ ├── jsp │ │ │ ├── CommentWords.jsp │ │ │ ├── Gender.jsp │ │ │ ├── ProvinceAddress.jsp │ │ │ ├── TitleWords.jsp │ │ │ ├── Titles.jsp │ │ │ └── Views.jsp │ │ └── web.xml │ │ ├── index.jsp │ │ └── js │ │ ├── echarts-wordcloud.js │ │ ├── echarts-wordcloud.min.js │ │ ├── echarts.common.min.js │ │ ├── jquery-3.2.1.min.js │ │ └── theme │ │ ├── dark.js │ │ ├── infographic.js │ │ ├── macarons.js │ │ ├── roma.js │ │ └── vintage.js └── target │ ├── classes │ ├── com │ │ └── crow │ │ │ ├── domain │ │ │ ├── AuxiliaryModels │ │ │ │ └── NameValue.class │ │ │ ├── Comment.class │ │ │ ├── CommentMapper.class │ │ │ ├── CommentWord.class │ │ │ ├── CommentWordMapper.class │ │ │ ├── CommentWordMapper.xml │ │ │ ├── Post.class │ │ │ ├── PostMapper.class │ │ │ ├── TitleWord.class │ │ │ ├── TitleWordMapper.class │ │ │ ├── TitleWordMapper.xml │ │ │ ├── User.class │ │ │ ├── UserMapper.class │ │ │ └── UserMapper.xml │ │ │ ├── service │ │ │ ├── CommentService.class │ │ │ ├── CommentWordService.class │ │ │ ├── PostService.class │ │ │ ├── TitleWordService.class │ │ │ └── UserService.class │ │ │ └── web │ │ │ └── EchartsController.class │ ├── db.properties │ ├── log4j.properties │ ├── mybatis │ │ └── sqlMapConfig.xml │ └── spring │ │ ├── applicationContext-dao.xml │ │ ├── applicationContext-service.xml │ │ ├── applicationContext-transaction.xml │ │ └── springmvc.xml │ └── data-analysis │ ├── META-INF │ └── MANIFEST.MF │ ├── WEB-INF │ ├── classes │ │ ├── com │ │ │ └── crow │ │ │ │ ├── domain │ │ │ │ ├── AuxiliaryModels │ │ │ │ │ └── NameValue.class │ │ │ │ ├── Comment.class │ │ │ │ ├── CommentMapper.class │ │ │ │ ├── CommentWord.class │ │ │ │ ├── CommentWordMapper.class │ │ │ │ ├── CommentWordMapper.xml │ │ │ │ ├── Post.class │ │ │ │ ├── PostMapper.class │ │ │ │ ├── TitleWord.class │ │ │ │ ├── TitleWordMapper.class │ │ │ │ ├── TitleWordMapper.xml │ │ │ │ ├── User.class │ │ │ │ ├── UserMapper.class │ │ │ │ └── UserMapper.xml │ │ │ │ ├── service │ │ │ │ ├── CommentService.class │ │ │ │ ├── CommentWordService.class │ │ │ │ ├── PostService.class │ │ │ │ ├── TitleWordService.class │ │ │ │ └── UserService.class │ │ │ │ └── web │ │ │ │ └── EchartsController.class │ │ ├── db.properties │ │ ├── log4j.properties │ │ ├── mybatis │ │ │ └── sqlMapConfig.xml │ │ └── spring │ │ │ ├── applicationContext-dao.xml │ │ │ ├── applicationContext-service.xml │ │ │ ├── applicationContext-transaction.xml │ │ │ └── springmvc.xml │ ├── jsp │ │ ├── CommentWords.jsp │ │ ├── Gender.jsp │ │ ├── ProvinceAddress.jsp │ │ ├── TitleWords.jsp │ │ ├── Titles.jsp │ │ └── Views.jsp │ ├── lib │ │ ├── ansj_seg-5.1.1.jar │ │ ├── aopalliance-1.0.jar │ │ ├── aspectjweaver-1.8.7.jar │ │ ├── classmate-1.1.0.jar │ │ ├── commons-dbcp-1.4.jar │ │ ├── commons-fileupload-1.3.1.jar │ │ ├── commons-io-2.2.jar │ │ ├── commons-logging-1.2.jar │ │ ├── commons-pool-1.5.4.jar │ │ ├── hibernate-validator-5.2.4.Final.jar │ │ ├── jackson-annotations-2.4.0.jar │ │ ├── jackson-core-2.4.3.jar │ │ ├── jackson-databind-2.4.3.jar │ │ ├── javax.servlet-api-3.1.0.jar │ │ ├── jboss-logging-3.2.1.Final.jar │ │ ├── jsp-api-2.2.jar │ │ ├── jstl-1.2.jar │ │ ├── log4j-1.2.17.jar │ │ ├── mybatis-3.3.1.jar │ │ ├── mybatis-spring-1.2.4.jar │ │ ├── mysql-connector-java-5.1.38.jar │ │ ├── nlp-lang-1.7.2.jar │ │ ├── slf4j-api-1.7.18.jar │ │ ├── spring-aop-4.2.4.RELEASE.jar │ │ ├── spring-aspects-4.2.4.RELEASE.jar │ │ ├── spring-beans-4.2.4.RELEASE.jar │ │ ├── spring-context-4.2.4.RELEASE.jar │ │ ├── spring-core-4.2.4.RELEASE.jar │ │ ├── spring-expression-4.2.4.RELEASE.jar │ │ ├── spring-jdbc-4.2.4.RELEASE.jar │ │ ├── spring-orm-4.2.4.RELEASE.jar │ │ ├── spring-test-4.2.4.RELEASE.jar │ │ ├── spring-tx-4.2.4.RELEASE.jar │ │ ├── spring-web-4.2.4.RELEASE.jar │ │ ├── spring-webmvc-4.2.4.RELEASE.jar │ │ ├── standard-1.1.2.jar │ │ └── validation-api-1.1.0.Final.jar │ └── web.xml │ ├── index.jsp │ └── js │ ├── echarts-wordcloud.js │ ├── echarts-wordcloud.min.js │ ├── echarts.common.min.js │ ├── jquery-3.2.1.min.js │ └── theme │ ├── dark.js │ ├── infographic.js │ ├── macarons.js │ ├── roma.js │ └── vintage.js ├── hupu-spider ├── .gitignore ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── crow │ │ │ ├── HupuSpiderApplication.java │ │ │ ├── domain │ │ │ ├── Comment.java │ │ │ ├── CommentList.java │ │ │ ├── CommentMapper.java │ │ │ ├── HupuBxjPostInfo.java │ │ │ ├── Post.java │ │ │ ├── PostInfoMapper.java │ │ │ ├── PostMapper.java │ │ │ ├── ProxyIp.java │ │ │ ├── ProxyIpMapper.java │ │ │ ├── TitleWord.java │ │ │ ├── TitleWordMapper.java │ │ │ ├── User.java │ │ │ └── UserMapper.java │ │ │ ├── utils │ │ │ ├── IPCheckUtil.java │ │ │ ├── ProxyGeneratedUtil.java │ │ │ ├── URLGeneratedUtil.java │ │ │ └── UserAgentUtil.java │ │ │ ├── web │ │ │ └── StartUpController.java │ │ │ └── webmagic │ │ │ ├── downloader │ │ │ └── CrowProxyProvider.java │ │ │ ├── pageprocessor │ │ │ └── HupuBxjPageProcessor.java │ │ │ └── pipeline │ │ │ └── HupuSpiderPipeline.java │ └── resources │ │ ├── application.yml │ │ └── db.sql │ └── test │ └── java │ └── com │ └── crow │ └── HupuspiderApplicationTests.java └── ip-spider ├── .gitignore ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── crow │ │ ├── IpspiderApplication.java │ │ ├── domain │ │ ├── ProxyIp.java │ │ └── ProxyIpMapper.java │ │ ├── utils │ │ └── UserAgentUtil.java │ │ ├── web │ │ └── StartUpController.java │ │ └── webmagic │ │ ├── pageprocessor │ │ ├── ProxyPoolProcessor1.java │ │ └── ProxyPoolProcessor2.java │ │ └── pipeline │ │ └── IPSpiderPipeline.java └── resources │ └── application.yml └── test └── java └── com └── crow ├── DataprocessingApplicationTests.java ├── IpspiderApplicationTests.java └── MagictoeApplicationTests.java /.gitattributes: -------------------------------------------------------------------------------- 1 | *.js linguist-language=Java 2 | *.html linguist-language=Java 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MagicToe 2 | MagicToe是一个基于Java爬虫框架[WebMagic](https://github.com/code4craft/webmagic)的Java爬虫实战案例,MagicToe提供了从获取数据到数据持久化、可视化分析以及构建简单的代理池等一系列完整流程,旨在为初涉Java爬虫的程序员提供一个参考教程和一整套完整的解决方案。 3 | 4 | ## 仓库目录 5 | + [hupu-spider](https://github.com/CrowHawk/MagicToe/tree/master/hupu-spider):爬虫功能实现模块,使用**WebMagic + SpringBoot + MyBatis**基础架构,NLP工具包是[Ansj中文分词](https://github.com/NLPchina/ansj_seg),定制抽取逻辑,将爬取的数据持久化到**MySQL**数据库中,本仓库中的代码示例爬取的是虎扑步行街。 6 | + [data-analysis](https://github.com/CrowHawk/MagicToe/tree/master/data-analysis):数据分析及可视化模块,使用**Spring + SpringMVC + MyBatis**的基础架构,数据可视化采用的前端技术是 **jsp +** [Echarts](http://www.echartsjs.com/)。 7 | + [ip-spider](https://github.com/CrowHawk/MagicToe/tree/master/data-analysis)(可选):爬取代理网站模块,技术选型同hupu-spider,将代理网站上的免费代理地址爬取到本地数据库中,实现一个简单的IP池,以供hupu-spider作为代理使用。 8 | 9 | ## QuickStart 10 | 11 | **爬虫模块环境准备:** 12 | + JDK 1.8+ 13 | + maven 4.0.0+ 14 | + webmagic 0.7.3+ 15 | + ansj_seg 5.1.1+ 16 | + springboot 1.5.7+ 17 | + mybatis 1.3.1+ 18 | + mysql 5.1.21+ 19 | 20 | **运行爬虫:** 21 | 以爬取虎扑步行街的帖子、用户和评论为例。 22 | 1. 初始化数据库 23 | 在本地MySQL中创建自己的schema,执行初始化数据库的脚本 [`hupu-spider/src/main/resources/db.sql`](https://github.com/CrowHawk/MagicToe/blob/master/hupu-spider/src/main/resources/db.sql) ,并根据自己的数据库信息修改配置文件 [`hupu-spider/src/main/resources/application.yml`](https://github.com/CrowHawk/MagicToe/blob/master/hupu-spider/src/main/resources/application.yml) 中的数据源信息。 24 | 2. 启动爬虫 25 | hupuspider通过URL请求的方式运行,在浏览器中键入 **localhost:8080/**(默认端口为8080,如果遇到端口冲突,可以在配置文件 [`hupu-spider/src/main/resources/application.yml`](https://github.com/CrowHawk/MagicToe/blob/master/hupu-spider/src/main/resources/application.yml) 中修改端口),爬虫即可开始运行了。 26 | 3. 运行数据可视化模块 27 | 将数据爬取到数据库中后,直接在Tomcat中运行[data-analysis](https://github.com/CrowHawk/MagicToe/tree/master/data-analysis)模块即可,通过在浏览器中输入不同的URL可以得到不同的图表,具体请查看 [`data-analysis/src/main/java/com/crow/web/EchartsController.java`](https://github.com/CrowHawk/MagicToe/blob/master/data-analysis/src/main/java/com/crow/web/EchartsController.java) 。 28 | 29 | ## 效果展示 30 | 以虎扑用户的地域分布为例: 31 | 32 |
33 | 34 | 更多详细的分析请参考我的博客[《数据不说谎:用网络爬虫探秘虎扑步行街》](https://crowhawk.github.io/2017/10/25/hupuspider/)。 35 | 36 | ## TODO 37 | * [ ] 使用Redis分布式队列实现分布式爬取。 38 | * [ ] 使用Quartz实现定时更新数据。 39 | 40 | ## 联系作者 41 | + Personal Website:[Crow Home](https://crowhawk.github.io/) 42 | + 知乎:[Martin Crow](https://www.zhihu.com/people/martin-crow/activities) 43 | 44 | -------------------------------------------------------------------------------- /data-analysis/data-analysis.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /data-analysis/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.crow 5 | data-analysis 6 | war 7 | 1.0-SNAPSHOT 8 | data-analysis Maven Webapp 9 | http://maven.apache.org 10 | 11 | UTF-8 12 | 13 | 4.2.4.RELEASE 14 | 1.8 15 | 1.8 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | javax.servlet 24 | javax.servlet-api 25 | 3.1.0 26 | 27 | 28 | javax.servlet.jsp 29 | jsp-api 30 | 2.2 31 | 32 | 33 | javax.servlet 34 | jstl 35 | 1.2 36 | 37 | 38 | 39 | org.springframework 40 | spring-webmvc 41 | ${spring.version} 42 | 43 | 44 | 45 | org.springframework 46 | spring-core 47 | ${spring.version} 48 | 49 | 50 | 51 | org.springframework 52 | spring-orm 53 | ${spring.version} 54 | 55 | 56 | 57 | org.springframework 58 | spring-aspects 59 | ${spring.version} 60 | 61 | 62 | 63 | org.springframework 64 | spring-test 65 | ${spring.version} 66 | 67 | 68 | 69 | org.springframework 70 | spring-jdbc 71 | ${spring.version} 72 | 73 | 74 | 75 | 76 | org.mybatis 77 | mybatis 78 | 3.3.1 79 | 80 | 81 | org.mybatis 82 | mybatis-spring 83 | 1.2.4 84 | 85 | 86 | 87 | mysql 88 | mysql-connector-java 89 | 5.1.38 90 | 91 | 92 | 93 | commons-dbcp 94 | commons-dbcp 95 | 1.4 96 | 97 | 98 | 99 | log4j 100 | log4j 101 | 1.2.17 102 | 103 | 104 | 105 | org.slf4j 106 | slf4j-api 107 | 1.7.18 108 | 109 | 110 | 111 | 112 | javax.servlet 113 | jstl 114 | 1.2 115 | 116 | 117 | taglibs 118 | standard 119 | 1.1.2 120 | 121 | 122 | 123 | 124 | org.hibernate 125 | hibernate-validator 126 | 5.2.4.Final 127 | 128 | 129 | 130 | 131 | commons-fileupload 132 | commons-fileupload 133 | 1.3.1 134 | 135 | 136 | 137 | 138 | com.fasterxml.jackson.core 139 | jackson-core 140 | 2.4.3 141 | 142 | 143 | com.fasterxml.jackson.core 144 | jackson-databind 145 | 2.4.3 146 | 147 | 148 | 149 | 150 | org.ansj 151 | ansj_seg 152 | 5.1.1 153 | 154 | 155 | 156 | 157 | data-analysis 158 | 159 | 160 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/AuxiliaryModels/NameValue.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain.AuxiliaryModels; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/23. 5 | */ 6 | 7 | /** 8 | * 用来拼接json数据返回给前端 9 | */ 10 | public class NameValue { 11 | private Integer value; 12 | private String name; 13 | 14 | public NameValue(Integer value, String name) { 15 | this.value = value; 16 | this.name = name; 17 | } 18 | 19 | public Integer getValue() { 20 | return value; 21 | } 22 | 23 | public void setValue(Integer value) { 24 | this.value = value; 25 | } 26 | 27 | public String getName() { 28 | return name; 29 | } 30 | 31 | public void setName(String name) { 32 | this.name = name; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/Comment.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class Comment { 7 | 8 | private int id; 9 | private int litNum;//评论点亮数 10 | private String author; 11 | private String content; 12 | private String title; 13 | 14 | public int getId() { 15 | return id; 16 | } 17 | 18 | public void setId(int id) { 19 | this.id = id; 20 | } 21 | 22 | public int getLitNum() { 23 | return litNum; 24 | } 25 | 26 | public void setLitNum(int litNum) { 27 | this.litNum = litNum; 28 | } 29 | 30 | public String getAuthor() { 31 | return author; 32 | } 33 | 34 | public void setAuthor(String author) { 35 | this.author = author; 36 | } 37 | 38 | public String getContent() { 39 | return content; 40 | } 41 | 42 | public void setContent(String content) { 43 | this.content = content; 44 | } 45 | 46 | public String getTitle() { 47 | return title; 48 | } 49 | 50 | public void setTitle(String title) { 51 | this.title = title; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/CommentMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Result; 5 | import org.apache.ibatis.annotations.Results; 6 | import org.apache.ibatis.annotations.Select; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/12. 12 | */ 13 | 14 | public interface CommentMapper { 15 | @Insert("insert into comment (`content`,`author`,`lit_num`,`title`) values(#{content},#{author},#{litNum},#{title})") 16 | void insert(Comment comment); 17 | 18 | @Results(id = "commentResult", value = { 19 | @Result(property = "id", column = "id", id = true), 20 | @Result(property = "litNum", column = "lit_num"), 21 | @Result(property = "author", column = "author"), 22 | @Result(property = "content", column = "content"), 23 | @Result(property = "title", column = "title") 24 | }) 25 | @Select("select * from comment order by lit_num desc limit #{selectLimitNum}") 26 | List selectAllCommentsSorted(Integer selectLimitNum); 27 | } 28 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/CommentWord.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/24. 5 | */ 6 | public class CommentWord { 7 | private Integer id; 8 | private String word;//分词内容 9 | private Integer wordCount;//分词出现次数 10 | 11 | /* 12 | public CommentWord(String word) { 13 | this.word = word; 14 | } 15 | */ 16 | 17 | public Integer getId() { 18 | return id; 19 | } 20 | 21 | public void setId(Integer id) { 22 | this.id = id; 23 | } 24 | 25 | public String getWord() { 26 | return word; 27 | } 28 | 29 | public void setWord(String word) { 30 | this.word = word; 31 | } 32 | 33 | public Integer getWordCount() { 34 | return wordCount; 35 | } 36 | 37 | public void setWordCount(Integer wordCount) { 38 | this.wordCount = wordCount; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/CommentWordMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Created by CrowHawk on 17/10/24. 7 | */ 8 | public interface CommentWordMapper { 9 | /* 10 | @Insert("insert into comment_word (`word`) values (#{word})") 11 | void insert(CommentWord commentWord); 12 | */ 13 | //获取所有标题的分词,并按出现频率排序 14 | List selectWordsSorted(Integer selectLimitNum); 15 | } 16 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/Post.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class Post { 7 | 8 | private int id; 9 | private String title; 10 | private String author; 11 | private int replyNum; 12 | 13 | public int getId() { 14 | return id; 15 | } 16 | 17 | public void setId(int id) { 18 | this.id = id; 19 | } 20 | 21 | public String getTitle() { 22 | return title; 23 | } 24 | 25 | public void setTitle(String title) { 26 | this.title = title; 27 | } 28 | 29 | public String getAuthor() { 30 | return author; 31 | } 32 | 33 | public void setAuthor(String author) { 34 | this.author = author; 35 | } 36 | 37 | public int getReplyNum() { 38 | return replyNum; 39 | } 40 | 41 | public void setReplyNum(int replyNum) { 42 | this.replyNum = replyNum; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/PostMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Result; 5 | import org.apache.ibatis.annotations.Results; 6 | import org.apache.ibatis.annotations.Select; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/12. 12 | */ 13 | 14 | public interface PostMapper { 15 | @Insert("insert ignore into post (`title`,`author`,`reply_num`) values (#{title},#{author},#{replyNum})") 16 | void insert(Post post); 17 | 18 | @Results(id = "postResult", value = { 19 | @Result(property = "id", column = "id", id = true), 20 | @Result(property = "title", column = "title"), 21 | @Result(property = "author", column = "author"), 22 | @Result(property = "replyNum", column = "reply_num") 23 | }) 24 | @Select("select * from post order by reply_num desc limit #{selectLimitNum}") 25 | List selectAllPostsSorted(Integer selectLimitNum); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/TitleWord.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | 7 | /** 8 | * 帖子标题的分词结果 9 | */ 10 | public class TitleWord { 11 | private Integer id; 12 | private String word;//分词内容 13 | 14 | private Integer wordCount;//分词出现次数 15 | 16 | public Integer getId() { 17 | return id; 18 | } 19 | 20 | public void setId(Integer id) { 21 | this.id = id; 22 | } 23 | 24 | public String getWord() { 25 | return word; 26 | } 27 | 28 | public void setWord(String word) { 29 | this.word = word; 30 | } 31 | 32 | public Integer getWordCount() { 33 | return wordCount; 34 | } 35 | 36 | public void setWordCount(Integer wordCount) { 37 | this.wordCount = wordCount; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/TitleWordMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created by CrowHawk on 17/10/12. 9 | */ 10 | 11 | public interface TitleWordMapper { 12 | @Insert("insert into title_word (`word`) values (#{word})") 13 | void insert(TitleWord titleWord); 14 | //获取所有标题的分词,并按出现频率排序 15 | List selectWordsSorted(Integer selectLimitNum); 16 | } 17 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/User.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class User { 7 | 8 | private int id; 9 | private String name; 10 | private String gender; 11 | private String homeTeam;//用户主队 12 | private String address;//用户所在地 13 | private Integer views;//用户主页访问量 14 | 15 | private String provinceAddress;//省名 16 | 17 | private Integer peopleNum;//各省人数 18 | 19 | private Integer maleNum;//男性人数 20 | 21 | private Integer femaleNum;//女性人数 22 | 23 | private Integer unknownNum;//未填写性别的人数 24 | 25 | private Integer genderNum; 26 | 27 | public int getId() { 28 | return id; 29 | } 30 | 31 | public void setId(int id) { 32 | this.id = id; 33 | } 34 | 35 | public String getName() { 36 | return name; 37 | } 38 | 39 | public void setName(String name) { 40 | this.name = name; 41 | } 42 | 43 | public String getGender() { 44 | return gender; 45 | } 46 | 47 | public void setGender(String gender) { 48 | this.gender = gender; 49 | } 50 | 51 | public String getHomeTeam() { 52 | return homeTeam; 53 | } 54 | 55 | public void setHomeTeam(String homeTeam) { 56 | this.homeTeam = homeTeam; 57 | } 58 | 59 | public String getAddress() { 60 | return address; 61 | } 62 | 63 | public void setAddress(String address) { 64 | this.address = address; 65 | } 66 | 67 | public Integer getViews() { 68 | return views; 69 | } 70 | 71 | public void setViews(Integer views) { 72 | this.views = views; 73 | } 74 | 75 | public String getProvinceAddress() { 76 | return provinceAddress; 77 | } 78 | 79 | public void setProvinceAddress(String provinceAddress) { 80 | this.provinceAddress = provinceAddress; 81 | } 82 | 83 | public Integer getPeopleNum() { 84 | return peopleNum; 85 | } 86 | 87 | public void setPeopleNum(Integer peopleNum) { 88 | this.peopleNum = peopleNum; 89 | } 90 | 91 | public Integer getGenderNum() { 92 | return genderNum; 93 | } 94 | 95 | public void setGenderNum(Integer genderNum) { 96 | this.genderNum = genderNum; 97 | } 98 | 99 | public Integer getMaleNum() { 100 | return maleNum; 101 | } 102 | 103 | public void setMaleNum(Integer maleNum) { 104 | this.maleNum = maleNum; 105 | } 106 | 107 | public Integer getFemaleNum() { 108 | return femaleNum; 109 | } 110 | 111 | public void setFemaleNum(Integer femaleNum) { 112 | this.femaleNum = femaleNum; 113 | } 114 | 115 | public Integer getUnknownNum() { 116 | return unknownNum; 117 | } 118 | 119 | public void setUnknownNum(Integer unknownNum) { 120 | this.unknownNum = unknownNum; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/domain/UserMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Created by CrowHawk on 17/10/12. 9 | */ 10 | 11 | public interface UserMapper { 12 | 13 | @Insert("insert ignore into user (`name`,`gender`,`home_team`,`address`) values (#{name},#{gender},#{homeTeam},#{address})") 14 | void insert(User user); 15 | //获取用户的地域分布 16 | List selectAllAddressesSort(Integer selectLimitNum); 17 | //获取用户的性别分布 18 | List selectAllGender(); 19 | //获取所有用户,并按照访问量排序 20 | List selectAllUsersSortedByViews(Integer selectLimitNum); 21 | } 22 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/service/CommentService.java: -------------------------------------------------------------------------------- 1 | package com.crow.service; 2 | 3 | import com.crow.domain.Comment; 4 | import com.crow.domain.CommentMapper; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/21. 12 | */ 13 | @Service 14 | public class CommentService { 15 | @Autowired 16 | CommentMapper commentMapper; 17 | 18 | public List getAllComments(Integer selectLimitNum) { 19 | return commentMapper.selectAllCommentsSorted(selectLimitNum); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/service/CommentWordService.java: -------------------------------------------------------------------------------- 1 | package com.crow.service; 2 | 3 | import com.crow.domain.CommentMapper; 4 | import com.crow.domain.CommentWord; 5 | import com.crow.domain.CommentWordMapper; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Service; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | /** 13 | * Created by CrowHawk on 17/10/24. 14 | */ 15 | @Service 16 | public class CommentWordService { 17 | @Autowired 18 | CommentWordMapper commentWordMapper; 19 | 20 | @Autowired 21 | CommentMapper commentMapper; 22 | 23 | /** 24 | * 根据数据库中的评论内容 25 | * 生成评论分词 26 | */ 27 | /* 28 | public void insertAllWords() { 29 | List commentList = commentMapper.selectAllCommentsSorted(100000); 30 | for(Comment comment: commentList) { 31 | String content = comment.getContent().replaceAll( "[\\p{P}+~$`^=|<>~`$^+=|<>¥×]" , ""); 32 | String[] strings = ToAnalysis.parse(content).toString().split(",");//分词的结果是用","分隔的 33 | for(String word: strings) { 34 | commentWordMapper.insert(new CommentWord(word)); 35 | } 36 | } 37 | } 38 | */ 39 | 40 | public List getAllWords(Integer selectLimitNum) { 41 | List commentWords = commentWordMapper.selectWordsSorted(selectLimitNum); 42 | List commentWordList = new ArrayList<>(); 43 | for(CommentWord commentWord: commentWords) { 44 | if(commentWord.getWord().matches("[\\u4e00-\\u9fa5]+/(n|a|vn|ad|b|t)")) {//去掉分词结果中的助词、语气词等 45 | commentWordList.add(commentWord); 46 | } 47 | } 48 | return commentWordList; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/service/PostService.java: -------------------------------------------------------------------------------- 1 | package com.crow.service; 2 | 3 | import com.crow.domain.Post; 4 | import com.crow.domain.PostMapper; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/21. 12 | */ 13 | @Service 14 | public class PostService { 15 | @Autowired 16 | PostMapper postMapper; 17 | 18 | public List getAllPosts(Integer selectLimitNum) { 19 | return postMapper.selectAllPostsSorted(selectLimitNum); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/service/TitleWordService.java: -------------------------------------------------------------------------------- 1 | package com.crow.service; 2 | 3 | import com.crow.domain.TitleWord; 4 | import com.crow.domain.TitleWordMapper; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/21. 12 | */ 13 | @Service 14 | public class TitleWordService { 15 | @Autowired 16 | TitleWordMapper titleWordMapper; 17 | 18 | public List getAllWords(Integer selectLimitNum) { 19 | return titleWordMapper.selectWordsSorted(selectLimitNum); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/service/UserService.java: -------------------------------------------------------------------------------- 1 | package com.crow.service; 2 | 3 | import com.crow.domain.User; 4 | import com.crow.domain.UserMapper; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * Created by CrowHawk on 17/10/21. 12 | */ 13 | @Service 14 | public class UserService { 15 | @Autowired 16 | UserMapper userMapper; 17 | 18 | public List getAddresses(Integer selectLimitNum) { 19 | return userMapper.selectAllAddressesSort(selectLimitNum); 20 | } 21 | 22 | public List getGender() { 23 | return userMapper.selectAllGender(); 24 | } 25 | 26 | public List getAllUsers(Integer selectLimitNum) { 27 | return userMapper.selectAllUsersSortedByViews(selectLimitNum); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /data-analysis/src/main/java/com/crow/web/EchartsController.java: -------------------------------------------------------------------------------- 1 | package com.crow.web; 2 | 3 | import com.crow.domain.AuxiliaryModels.NameValue; 4 | import com.crow.domain.CommentWord; 5 | import com.crow.domain.Post; 6 | import com.crow.domain.TitleWord; 7 | import com.crow.domain.User; 8 | import com.crow.service.*; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.stereotype.Controller; 11 | import org.springframework.web.bind.annotation.RequestMapping; 12 | import org.springframework.web.bind.annotation.ResponseBody; 13 | 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | 17 | /** 18 | * Created by CrowHawk on 17/10/21. 19 | */ 20 | @Controller 21 | public class EchartsController { 22 | @Autowired 23 | CommentService commentService; 24 | @Autowired 25 | PostService postService; 26 | @Autowired 27 | TitleWordService titleWordService; 28 | @Autowired 29 | UserService userService; 30 | @Autowired 31 | CommentWordService commentWordService; 32 | 33 | @RequestMapping(value = "/getAddress") 34 | @ResponseBody 35 | public List getAddress() { 36 | List users = userService.getAddresses(18); 37 | return users; 38 | } 39 | 40 | @RequestMapping(value = "/getGender") 41 | @ResponseBody 42 | public List getGender() { 43 | List users = userService.getGender(); 44 | List result = new ArrayList<>(); 45 | for(User user: users) { 46 | result.add(new NameValue(user.getGenderNum(), user.getGender())); 47 | } 48 | return result; 49 | } 50 | 51 | @RequestMapping(value = "/getTitleWord") 52 | @ResponseBody 53 | public List getTitleWord() { 54 | List titleWords = titleWordService.getAllWords(30); 55 | List result = new ArrayList<>(); 56 | for(TitleWord titleWord: titleWords) { 57 | result.add(new NameValue(titleWord.getWordCount(), titleWord.getWord())); 58 | } 59 | return result; 60 | } 61 | 62 | @RequestMapping(value = "/getTitle") 63 | @ResponseBody 64 | public List getTitle() { 65 | List posts = postService.getAllPosts(20); 66 | List result = new ArrayList<>(); 67 | for(Post post: posts) { 68 | result.add(new NameValue(post.getReplyNum(), post.getTitle())); 69 | } 70 | return result; 71 | } 72 | 73 | @RequestMapping(value = "/getViews") 74 | @ResponseBody 75 | public List getViews() { 76 | List users = userService.getAllUsers(10); 77 | return users; 78 | } 79 | 80 | @RequestMapping(value = "/getCommentWord") 81 | @ResponseBody 82 | public List getCommentWord() { 83 | List commentWords = commentWordService.getAllWords(300); 84 | List result = new ArrayList<>(); 85 | for(CommentWord commentWord: commentWords) { 86 | String word = commentWord.getWord(); 87 | result.add(new NameValue(commentWord.getWordCount(), word.substring(0, word.indexOf("/"))));//去掉分词中的词性标识 88 | } 89 | return result; 90 | } 91 | 92 | /* 93 | @RequestMapping(value = "/genCommentWord") 94 | public void genCommentWord() { 95 | commentWordService.insertAllWords(); 96 | } 97 | */ 98 | 99 | @RequestMapping(value = "/province") 100 | public String getAddressEcharts() { 101 | return "ProvinceAddress"; 102 | } 103 | 104 | @RequestMapping(value = "/gender") 105 | public String getGenderEcharts() { 106 | return "Gender"; 107 | } 108 | 109 | @RequestMapping(value = "/views") 110 | public String getViewsEcharts() { 111 | return "Views"; 112 | } 113 | 114 | @RequestMapping(value = "/titleword") 115 | public String getTitleWordEcharts() { 116 | return "TitleWords"; 117 | } 118 | 119 | @RequestMapping(value = "/title") 120 | public String getTitleEcharts() { 121 | return "Titles"; 122 | } 123 | 124 | @RequestMapping(value = "/commentword") 125 | public String getCommentWordEcharts() { 126 | return "CommentWords"; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/com/crow/domain/CommentWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | comment_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/com/crow/domain/TitleWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | title_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/com/crow/domain/UserMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | USER 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 37 | 38 | 47 | 48 | 56 | 57 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/db.properties: -------------------------------------------------------------------------------- 1 | jdbc.driver=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/HupuSpider?characterEncoding=utf-8 3 | jdbc.username=root 4 | jdbc.password=wyj 5 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Global logging configuration 2 | # 在开发环境下日志级别设置成DEBUG,在生产环境下才设置为ERROR和INFO 3 | log4j.rootLogger=DEBUG, stdout 4 | # MyBatis logging configuration... 5 | log4j.logger.org.mybatis.example.BlogMapper=TRACE 6 | # Console output... 7 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 8 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n 10 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/mybatis/sqlMapConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 23 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/spring/applicationContext-dao.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/spring/applicationContext-service.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/spring/applicationContext-transaction.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | 14 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /data-analysis/src/main/resources/spring/springmvc.xml: -------------------------------------------------------------------------------- 1 | 15 | 16 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 36 | 37 | 38 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/CommentWords.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/24 5 | Time: 上午11:12 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/Gender.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午6:19 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 130 | 131 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/ProvinceAddress.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/22 5 | Time: 下午10:21 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | 9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 10 | 11 | 12 | ECharts 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 124 | 125 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/TitleWords.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午9:39 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/Titles.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午11:00 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/jsp/Views.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午12:49 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | 9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 10 | 11 | 12 | ECharts 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 95 | 96 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | Archetype Created Web Application 6 | 7 | 8 | contextConfigLocation 9 | classpath:spring/applicationContext-*.xml 10 | 11 | 12 | 13 | org.springframework.web.context.ContextLoaderListener 14 | 15 | 16 | 17 | 18 | springmvc 19 | org.springframework.web.servlet.DispatcherServlet 20 | 23 | 24 | contextConfigLocation 25 | classpath:spring/springmvc.xml 26 | 27 | 28 | 29 | 30 | springmvc 31 | 38 | *.action 39 | 40 | 41 | 42 | 43 | springmvc_rest 44 | org.springframework.web.servlet.DispatcherServlet 45 | 46 | 47 | contextConfigLocation 48 | classpath:spring/springmvc.xml 49 | 50 | 51 | 52 | 53 | springmvc_rest 54 | / 55 | 56 | 57 | 58 | 59 | CharacterEncodingFilter 60 | org.springframework.web.filter.CharacterEncodingFilter 61 | 62 | encoding 63 | utf-8 64 | 65 | 66 | 67 | CharacterEncodingFilter 68 | /* 69 | 70 | 71 | 72 | index.html 73 | index.htm 74 | index.jsp 75 | default.html 76 | default.htm 77 | default.jsp 78 | 79 | 80 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/index.jsp: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Hello World!

4 | 5 | 6 | -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/js/theme/dark.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | // AMD. Register as an anonymous module. 4 | define(['exports', 'echarts'], factory); 5 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 6 | // CommonJS 7 | factory(exports, require('echarts')); 8 | } else { 9 | // Browser globals 10 | factory({}, root.echarts); 11 | } 12 | }(this, function (exports, echarts) { 13 | var log = function (msg) { 14 | if (typeof console !== 'undefined') { 15 | console && console.error && console.error(msg); 16 | } 17 | }; 18 | if (!echarts) { 19 | log('ECharts is not Loaded'); 20 | return; 21 | } 22 | var contrastColor = '#eee'; 23 | var axisCommon = function () { 24 | return { 25 | axisLine: { 26 | lineStyle: { 27 | color: contrastColor 28 | } 29 | }, 30 | axisTick: { 31 | lineStyle: { 32 | color: contrastColor 33 | } 34 | }, 35 | axisLabel: { 36 | textStyle: { 37 | color: contrastColor 38 | } 39 | }, 40 | splitLine: { 41 | lineStyle: { 42 | type: 'dashed', 43 | color: '#aaa' 44 | } 45 | }, 46 | splitArea: { 47 | areaStyle: { 48 | color: contrastColor 49 | } 50 | } 51 | }; 52 | }; 53 | 54 | var colorPalette = ['#dd6b66','#759aa0','#e69d87','#8dc1a9','#ea7e53','#eedd78','#73a373','#73b9bc','#7289ab', '#91ca8c','#f49f42']; 55 | var theme = { 56 | color: colorPalette, 57 | backgroundColor: '#333', 58 | tooltip: { 59 | axisPointer: { 60 | lineStyle: { 61 | color: contrastColor 62 | }, 63 | crossStyle: { 64 | color: contrastColor 65 | } 66 | } 67 | }, 68 | legend: { 69 | textStyle: { 70 | color: contrastColor 71 | } 72 | }, 73 | textStyle: { 74 | color: contrastColor 75 | }, 76 | title: { 77 | textStyle: { 78 | color: contrastColor 79 | } 80 | }, 81 | toolbox: { 82 | iconStyle: { 83 | normal: { 84 | borderColor: contrastColor 85 | } 86 | } 87 | }, 88 | dataZoom: { 89 | textStyle: { 90 | color: contrastColor 91 | } 92 | }, 93 | timeline: { 94 | lineStyle: { 95 | color: contrastColor 96 | }, 97 | itemStyle: { 98 | normal: { 99 | color: colorPalette[1] 100 | } 101 | }, 102 | label: { 103 | normal: { 104 | textStyle: { 105 | color: contrastColor 106 | } 107 | } 108 | }, 109 | controlStyle: { 110 | normal: { 111 | color: contrastColor, 112 | borderColor: contrastColor 113 | } 114 | } 115 | }, 116 | timeAxis: axisCommon(), 117 | logAxis: axisCommon(), 118 | valueAxis: axisCommon(), 119 | categoryAxis: axisCommon(), 120 | 121 | line: { 122 | symbol: 'circle' 123 | }, 124 | graph: { 125 | color: colorPalette 126 | }, 127 | gauge: { 128 | title: { 129 | textStyle: { 130 | color: contrastColor 131 | } 132 | } 133 | }, 134 | candlestick: { 135 | itemStyle: { 136 | normal: { 137 | color: '#FD1050', 138 | color0: '#0CF49B', 139 | borderColor: '#FD1050', 140 | borderColor0: '#0CF49B' 141 | } 142 | } 143 | } 144 | }; 145 | theme.categoryAxis.splitLine.show = false; 146 | echarts.registerTheme('dark', theme); 147 | })); -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/js/theme/macarons.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | // AMD. Register as an anonymous module. 4 | define(['exports', 'echarts'], factory); 5 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 6 | // CommonJS 7 | factory(exports, require('echarts')); 8 | } else { 9 | // Browser globals 10 | factory({}, root.echarts); 11 | } 12 | }(this, function (exports, echarts) { 13 | var log = function (msg) { 14 | if (typeof console !== 'undefined') { 15 | console && console.error && console.error(msg); 16 | } 17 | }; 18 | if (!echarts) { 19 | log('ECharts is not Loaded'); 20 | return; 21 | } 22 | 23 | var colorPalette = [ 24 | '#2ec7c9','#b6a2de','#5ab1ef','#ffb980','#d87a80', 25 | '#8d98b3','#e5cf0d','#97b552','#95706d','#dc69aa', 26 | '#07a2a4','#9a7fd1','#588dd5','#f5994e','#c05050', 27 | '#59678c','#c9ab00','#7eb00a','#6f5553','#c14089' 28 | ]; 29 | 30 | 31 | var theme = { 32 | color: colorPalette, 33 | 34 | title: { 35 | textStyle: { 36 | fontWeight: 'normal', 37 | color: '#008acd' 38 | } 39 | }, 40 | 41 | visualMap: { 42 | itemWidth: 15, 43 | color: ['#5ab1ef','#e0ffff'] 44 | }, 45 | 46 | toolbox: { 47 | iconStyle: { 48 | normal: { 49 | borderColor: colorPalette[0] 50 | } 51 | } 52 | }, 53 | 54 | tooltip: { 55 | backgroundColor: 'rgba(50,50,50,0.5)', 56 | axisPointer : { 57 | type : 'line', 58 | lineStyle : { 59 | color: '#008acd' 60 | }, 61 | crossStyle: { 62 | color: '#008acd' 63 | }, 64 | shadowStyle : { 65 | color: 'rgba(200,200,200,0.2)' 66 | } 67 | } 68 | }, 69 | 70 | dataZoom: { 71 | dataBackgroundColor: '#efefff', 72 | fillerColor: 'rgba(182,162,222,0.2)', 73 | handleColor: '#008acd' 74 | }, 75 | 76 | grid: { 77 | borderColor: '#eee' 78 | }, 79 | 80 | categoryAxis: { 81 | axisLine: { 82 | lineStyle: { 83 | color: '#008acd' 84 | } 85 | }, 86 | splitLine: { 87 | lineStyle: { 88 | color: ['#eee'] 89 | } 90 | } 91 | }, 92 | 93 | valueAxis: { 94 | axisLine: { 95 | lineStyle: { 96 | color: '#008acd' 97 | } 98 | }, 99 | splitArea : { 100 | show : true, 101 | areaStyle : { 102 | color: ['rgba(250,250,250,0.1)','rgba(200,200,200,0.1)'] 103 | } 104 | }, 105 | splitLine: { 106 | lineStyle: { 107 | color: ['#eee'] 108 | } 109 | } 110 | }, 111 | 112 | timeline : { 113 | lineStyle : { 114 | color : '#008acd' 115 | }, 116 | controlStyle : { 117 | normal : { color : '#008acd'}, 118 | emphasis : { color : '#008acd'} 119 | }, 120 | symbol : 'emptyCircle', 121 | symbolSize : 3 122 | }, 123 | 124 | line: { 125 | smooth : true, 126 | symbol: 'emptyCircle', 127 | symbolSize: 3 128 | }, 129 | 130 | candlestick: { 131 | itemStyle: { 132 | normal: { 133 | color: '#d87a80', 134 | color0: '#2ec7c9', 135 | lineStyle: { 136 | color: '#d87a80', 137 | color0: '#2ec7c9' 138 | } 139 | } 140 | } 141 | }, 142 | 143 | scatter: { 144 | symbol: 'circle', 145 | symbolSize: 4 146 | }, 147 | 148 | map: { 149 | label: { 150 | normal: { 151 | textStyle: { 152 | color: '#d87a80' 153 | } 154 | } 155 | }, 156 | itemStyle: { 157 | normal: { 158 | borderColor: '#eee', 159 | areaColor: '#ddd' 160 | }, 161 | emphasis: { 162 | areaColor: '#fe994e' 163 | } 164 | } 165 | }, 166 | 167 | graph: { 168 | color: colorPalette 169 | }, 170 | 171 | gauge : { 172 | axisLine: { 173 | lineStyle: { 174 | color: [[0.2, '#2ec7c9'],[0.8, '#5ab1ef'],[1, '#d87a80']], 175 | width: 10 176 | } 177 | }, 178 | axisTick: { 179 | splitNumber: 10, 180 | length :15, 181 | lineStyle: { 182 | color: 'auto' 183 | } 184 | }, 185 | splitLine: { 186 | length :22, 187 | lineStyle: { 188 | color: 'auto' 189 | } 190 | }, 191 | pointer : { 192 | width : 5 193 | } 194 | } 195 | }; 196 | 197 | echarts.registerTheme('macarons', theme); 198 | })); -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/js/theme/roma.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) {if (typeof define === 'function' && define.amd) { 2 | // AMD. Register as an anonymous module. 3 | define(['exports', 'echarts'], factory); 4 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 5 | // CommonJS 6 | factory(exports, require('echarts')); 7 | } else { 8 | // Browser globals 9 | factory({}, root.echarts); 10 | } 11 | }(this, function (exports, echarts) { 12 | var log = function (msg) { 13 | if (typeof console !== 'undefined') { 14 | console && console.error && console.error(msg); 15 | } 16 | }; 17 | if (!echarts) { 18 | log('ECharts is not Loaded'); 19 | return; 20 | } 21 | 22 | var colorPalette = ['#E01F54','#001852','#f5e8c8','#b8d2c7','#c6b38e', 23 | '#a4d8c2','#f3d999','#d3758f','#dcc392','#2e4783', 24 | '#82b6e9','#ff6347','#a092f1','#0a915d','#eaf889', 25 | '#6699FF','#ff6666','#3cb371','#d5b158','#38b6b6' 26 | ]; 27 | 28 | var theme = { 29 | color: colorPalette, 30 | 31 | visualMap: { 32 | color:['#e01f54','#e7dbc3'], 33 | textStyle: { 34 | color: '#333' 35 | } 36 | }, 37 | 38 | candlestick: { 39 | itemStyle: { 40 | normal: { 41 | color: '#e01f54', 42 | color0: '#001852', 43 | lineStyle: { 44 | width: 1, 45 | color: '#f5e8c8', 46 | color0: '#b8d2c7' 47 | } 48 | } 49 | } 50 | }, 51 | 52 | graph: { 53 | color: colorPalette 54 | }, 55 | 56 | gauge : { 57 | axisLine: { 58 | lineStyle: { 59 | color: [[0.2, '#E01F54'],[0.8, '#b8d2c7'],[1, '#001852']], 60 | width: 8 61 | } 62 | } 63 | } 64 | }; 65 | 66 | echarts.registerTheme('roma', theme); 67 | })); -------------------------------------------------------------------------------- /data-analysis/src/main/webapp/js/theme/vintage.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | // AMD. Register as an anonymous module. 4 | define(['exports', 'echarts'], factory); 5 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 6 | // CommonJS 7 | factory(exports, require('echarts')); 8 | } else { 9 | // Browser globals 10 | factory({}, root.echarts); 11 | } 12 | }(this, function (exports, echarts) { 13 | var log = function (msg) { 14 | if (typeof console !== 'undefined') { 15 | console && console.error && console.error(msg); 16 | } 17 | }; 18 | if (!echarts) { 19 | log('ECharts is not Loaded'); 20 | return; 21 | } 22 | var colorPalette = ['#d87c7c','#919e8b', '#d7ab82', '#6e7074','#61a0a8','#efa18d', '#787464', '#cc7e63', '#724e58', '#4b565b']; 23 | echarts.registerTheme('vintage', { 24 | color: colorPalette, 25 | backgroundColor: '#fef8ef', 26 | graph: { 27 | color: colorPalette 28 | } 29 | }); 30 | })); -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/AuxiliaryModels/NameValue.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/AuxiliaryModels/NameValue.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/Comment.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/Comment.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/CommentMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/CommentMapper.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/CommentWord.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/CommentWord.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/CommentWordMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/CommentWordMapper.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/CommentWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | comment_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/Post.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/Post.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/PostMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/PostMapper.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/TitleWord.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/TitleWord.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/TitleWordMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/TitleWordMapper.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/TitleWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | title_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/User.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/User.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/UserMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/domain/UserMapper.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/domain/UserMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | USER 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 37 | 38 | 47 | 48 | 56 | 57 | -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/service/CommentService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/service/CommentService.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/service/CommentWordService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/service/CommentWordService.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/service/PostService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/service/PostService.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/service/TitleWordService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/service/TitleWordService.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/service/UserService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/service/UserService.class -------------------------------------------------------------------------------- /data-analysis/target/classes/com/crow/web/EchartsController.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/classes/com/crow/web/EchartsController.class -------------------------------------------------------------------------------- /data-analysis/target/classes/db.properties: -------------------------------------------------------------------------------- 1 | jdbc.driver=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/HupuSpider?characterEncoding=utf-8 3 | jdbc.username=root 4 | jdbc.password=wyj 5 | -------------------------------------------------------------------------------- /data-analysis/target/classes/log4j.properties: -------------------------------------------------------------------------------- 1 | # Global logging configuration 2 | # 在开发环境下日志级别设置成DEBUG,在生产环境下才设置为ERROR和INFO 3 | log4j.rootLogger=DEBUG, stdout 4 | # MyBatis logging configuration... 5 | log4j.logger.org.mybatis.example.BlogMapper=TRACE 6 | # Console output... 7 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 8 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n 10 | -------------------------------------------------------------------------------- /data-analysis/target/classes/mybatis/sqlMapConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/classes/spring/applicationContext-dao.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /data-analysis/target/classes/spring/applicationContext-service.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /data-analysis/target/classes/spring/applicationContext-transaction.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | 14 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /data-analysis/target/classes/spring/springmvc.xml: -------------------------------------------------------------------------------- 1 | 15 | 16 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 36 | 37 | 38 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Built-By: CrowHawk 3 | Created-By: IntelliJ IDEA 4 | Build-Jdk: 1.8.0_66 5 | 6 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/AuxiliaryModels/NameValue.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/AuxiliaryModels/NameValue.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/Comment.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/Comment.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentMapper.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentWord.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentWord.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentWordMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentWordMapper.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/CommentWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | comment_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/Post.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/Post.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/PostMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/PostMapper.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/TitleWord.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/TitleWord.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/TitleWordMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/TitleWordMapper.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/TitleWordMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | title_word 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/User.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/User.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/UserMapper.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/UserMapper.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/domain/UserMapper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | USER 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 37 | 38 | 47 | 48 | 56 | 57 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/CommentService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/CommentService.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/CommentWordService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/CommentWordService.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/PostService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/PostService.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/TitleWordService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/TitleWordService.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/UserService.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/service/UserService.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/com/crow/web/EchartsController.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/classes/com/crow/web/EchartsController.class -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/db.properties: -------------------------------------------------------------------------------- 1 | jdbc.driver=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/HupuSpider?characterEncoding=utf-8 3 | jdbc.username=root 4 | jdbc.password=wyj 5 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/log4j.properties: -------------------------------------------------------------------------------- 1 | # Global logging configuration 2 | # 在开发环境下日志级别设置成DEBUG,在生产环境下才设置为ERROR和INFO 3 | log4j.rootLogger=DEBUG, stdout 4 | # MyBatis logging configuration... 5 | log4j.logger.org.mybatis.example.BlogMapper=TRACE 6 | # Console output... 7 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 8 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n 10 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/mybatis/sqlMapConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 23 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/spring/applicationContext-dao.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/spring/applicationContext-service.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/spring/applicationContext-transaction.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | 14 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/classes/spring/springmvc.xml: -------------------------------------------------------------------------------- 1 | 15 | 16 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 36 | 37 | 38 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/CommentWords.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/24 5 | Time: 上午11:12 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/Gender.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午6:19 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 130 | 131 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/ProvinceAddress.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/22 5 | Time: 下午10:21 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | 9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 10 | 11 | 12 | ECharts 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 124 | 125 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/TitleWords.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午9:39 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/Titles.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午11:00 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 25 |
26 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/jsp/Views.jsp: -------------------------------------------------------------------------------- 1 | <%-- 2 | Created by IntelliJ IDEA. 3 | User: CrowHawk 4 | Date: 17/10/23 5 | Time: 下午12:49 6 | To change this template use File | Settings | File Templates. 7 | --%> 8 | 9 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 10 | 11 | 12 | ECharts 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 95 | 96 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/ansj_seg-5.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/ansj_seg-5.1.1.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/aopalliance-1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/aopalliance-1.0.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/aspectjweaver-1.8.7.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/aspectjweaver-1.8.7.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/classmate-1.1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/classmate-1.1.0.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/commons-dbcp-1.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/commons-dbcp-1.4.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/commons-fileupload-1.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/commons-fileupload-1.3.1.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/commons-io-2.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/commons-io-2.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/commons-logging-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/commons-logging-1.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/commons-pool-1.5.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/commons-pool-1.5.4.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/hibernate-validator-5.2.4.Final.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/hibernate-validator-5.2.4.Final.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jackson-annotations-2.4.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jackson-annotations-2.4.0.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jackson-core-2.4.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jackson-core-2.4.3.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jackson-databind-2.4.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jackson-databind-2.4.3.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/javax.servlet-api-3.1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/javax.servlet-api-3.1.0.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jboss-logging-3.2.1.Final.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jboss-logging-3.2.1.Final.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jsp-api-2.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jsp-api-2.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/jstl-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/jstl-1.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/log4j-1.2.17.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/log4j-1.2.17.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/mybatis-3.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/mybatis-3.3.1.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/mybatis-spring-1.2.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/mybatis-spring-1.2.4.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/mysql-connector-java-5.1.38.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/mysql-connector-java-5.1.38.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/nlp-lang-1.7.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/nlp-lang-1.7.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/slf4j-api-1.7.18.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/slf4j-api-1.7.18.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-aop-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-aop-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-aspects-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-aspects-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-beans-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-beans-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-context-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-context-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-core-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-core-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-expression-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-expression-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-jdbc-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-jdbc-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-orm-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-orm-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-test-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-test-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-tx-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-tx-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-web-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-web-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/spring-webmvc-4.2.4.RELEASE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/spring-webmvc-4.2.4.RELEASE.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/standard-1.1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/standard-1.1.2.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/lib/validation-api-1.1.0.Final.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshb/MagicToe/5a94b3470dbb5766ac7bfc51542876860c98757c/data-analysis/target/data-analysis/WEB-INF/lib/validation-api-1.1.0.Final.jar -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | Archetype Created Web Application 6 | 7 | 8 | contextConfigLocation 9 | classpath:spring/applicationContext-*.xml 10 | 11 | 12 | 13 | org.springframework.web.context.ContextLoaderListener 14 | 15 | 16 | 17 | 18 | springmvc 19 | org.springframework.web.servlet.DispatcherServlet 20 | 23 | 24 | contextConfigLocation 25 | classpath:spring/springmvc.xml 26 | 27 | 28 | 29 | 30 | springmvc 31 | 38 | *.action 39 | 40 | 41 | 42 | 43 | springmvc_rest 44 | org.springframework.web.servlet.DispatcherServlet 45 | 46 | 47 | contextConfigLocation 48 | classpath:spring/springmvc.xml 49 | 50 | 51 | 52 | 53 | springmvc_rest 54 | / 55 | 56 | 57 | 58 | 59 | CharacterEncodingFilter 60 | org.springframework.web.filter.CharacterEncodingFilter 61 | 62 | encoding 63 | utf-8 64 | 65 | 66 | 67 | CharacterEncodingFilter 68 | /* 69 | 70 | 71 | 72 | index.html 73 | index.htm 74 | index.jsp 75 | default.html 76 | default.htm 77 | default.jsp 78 | 79 | 80 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/index.jsp: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Hello World!

4 | 5 | 6 | -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/js/theme/dark.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | // AMD. Register as an anonymous module. 4 | define(['exports', 'echarts'], factory); 5 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 6 | // CommonJS 7 | factory(exports, require('echarts')); 8 | } else { 9 | // Browser globals 10 | factory({}, root.echarts); 11 | } 12 | }(this, function (exports, echarts) { 13 | var log = function (msg) { 14 | if (typeof console !== 'undefined') { 15 | console && console.error && console.error(msg); 16 | } 17 | }; 18 | if (!echarts) { 19 | log('ECharts is not Loaded'); 20 | return; 21 | } 22 | var contrastColor = '#eee'; 23 | var axisCommon = function () { 24 | return { 25 | axisLine: { 26 | lineStyle: { 27 | color: contrastColor 28 | } 29 | }, 30 | axisTick: { 31 | lineStyle: { 32 | color: contrastColor 33 | } 34 | }, 35 | axisLabel: { 36 | textStyle: { 37 | color: contrastColor 38 | } 39 | }, 40 | splitLine: { 41 | lineStyle: { 42 | type: 'dashed', 43 | color: '#aaa' 44 | } 45 | }, 46 | splitArea: { 47 | areaStyle: { 48 | color: contrastColor 49 | } 50 | } 51 | }; 52 | }; 53 | 54 | var colorPalette = ['#dd6b66','#759aa0','#e69d87','#8dc1a9','#ea7e53','#eedd78','#73a373','#73b9bc','#7289ab', '#91ca8c','#f49f42']; 55 | var theme = { 56 | color: colorPalette, 57 | backgroundColor: '#333', 58 | tooltip: { 59 | axisPointer: { 60 | lineStyle: { 61 | color: contrastColor 62 | }, 63 | crossStyle: { 64 | color: contrastColor 65 | } 66 | } 67 | }, 68 | legend: { 69 | textStyle: { 70 | color: contrastColor 71 | } 72 | }, 73 | textStyle: { 74 | color: contrastColor 75 | }, 76 | title: { 77 | textStyle: { 78 | color: contrastColor 79 | } 80 | }, 81 | toolbox: { 82 | iconStyle: { 83 | normal: { 84 | borderColor: contrastColor 85 | } 86 | } 87 | }, 88 | dataZoom: { 89 | textStyle: { 90 | color: contrastColor 91 | } 92 | }, 93 | timeline: { 94 | lineStyle: { 95 | color: contrastColor 96 | }, 97 | itemStyle: { 98 | normal: { 99 | color: colorPalette[1] 100 | } 101 | }, 102 | label: { 103 | normal: { 104 | textStyle: { 105 | color: contrastColor 106 | } 107 | } 108 | }, 109 | controlStyle: { 110 | normal: { 111 | color: contrastColor, 112 | borderColor: contrastColor 113 | } 114 | } 115 | }, 116 | timeAxis: axisCommon(), 117 | logAxis: axisCommon(), 118 | valueAxis: axisCommon(), 119 | categoryAxis: axisCommon(), 120 | 121 | line: { 122 | symbol: 'circle' 123 | }, 124 | graph: { 125 | color: colorPalette 126 | }, 127 | gauge: { 128 | title: { 129 | textStyle: { 130 | color: contrastColor 131 | } 132 | } 133 | }, 134 | candlestick: { 135 | itemStyle: { 136 | normal: { 137 | color: '#FD1050', 138 | color0: '#0CF49B', 139 | borderColor: '#FD1050', 140 | borderColor0: '#0CF49B' 141 | } 142 | } 143 | } 144 | }; 145 | theme.categoryAxis.splitLine.show = false; 146 | echarts.registerTheme('dark', theme); 147 | })); -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/js/theme/roma.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) {if (typeof define === 'function' && define.amd) { 2 | // AMD. Register as an anonymous module. 3 | define(['exports', 'echarts'], factory); 4 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 5 | // CommonJS 6 | factory(exports, require('echarts')); 7 | } else { 8 | // Browser globals 9 | factory({}, root.echarts); 10 | } 11 | }(this, function (exports, echarts) { 12 | var log = function (msg) { 13 | if (typeof console !== 'undefined') { 14 | console && console.error && console.error(msg); 15 | } 16 | }; 17 | if (!echarts) { 18 | log('ECharts is not Loaded'); 19 | return; 20 | } 21 | 22 | var colorPalette = ['#E01F54','#001852','#f5e8c8','#b8d2c7','#c6b38e', 23 | '#a4d8c2','#f3d999','#d3758f','#dcc392','#2e4783', 24 | '#82b6e9','#ff6347','#a092f1','#0a915d','#eaf889', 25 | '#6699FF','#ff6666','#3cb371','#d5b158','#38b6b6' 26 | ]; 27 | 28 | var theme = { 29 | color: colorPalette, 30 | 31 | visualMap: { 32 | color:['#e01f54','#e7dbc3'], 33 | textStyle: { 34 | color: '#333' 35 | } 36 | }, 37 | 38 | candlestick: { 39 | itemStyle: { 40 | normal: { 41 | color: '#e01f54', 42 | color0: '#001852', 43 | lineStyle: { 44 | width: 1, 45 | color: '#f5e8c8', 46 | color0: '#b8d2c7' 47 | } 48 | } 49 | } 50 | }, 51 | 52 | graph: { 53 | color: colorPalette 54 | }, 55 | 56 | gauge : { 57 | axisLine: { 58 | lineStyle: { 59 | color: [[0.2, '#E01F54'],[0.8, '#b8d2c7'],[1, '#001852']], 60 | width: 8 61 | } 62 | } 63 | } 64 | }; 65 | 66 | echarts.registerTheme('roma', theme); 67 | })); -------------------------------------------------------------------------------- /data-analysis/target/data-analysis/js/theme/vintage.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | // AMD. Register as an anonymous module. 4 | define(['exports', 'echarts'], factory); 5 | } else if (typeof exports === 'object' && typeof exports.nodeName !== 'string') { 6 | // CommonJS 7 | factory(exports, require('echarts')); 8 | } else { 9 | // Browser globals 10 | factory({}, root.echarts); 11 | } 12 | }(this, function (exports, echarts) { 13 | var log = function (msg) { 14 | if (typeof console !== 'undefined') { 15 | console && console.error && console.error(msg); 16 | } 17 | }; 18 | if (!echarts) { 19 | log('ECharts is not Loaded'); 20 | return; 21 | } 22 | var colorPalette = ['#d87c7c','#919e8b', '#d7ab82', '#6e7074','#61a0a8','#efa18d', '#787464', '#cc7e63', '#724e58', '#4b565b']; 23 | echarts.registerTheme('vintage', { 24 | color: colorPalette, 25 | backgroundColor: '#fef8ef', 26 | graph: { 27 | color: colorPalette 28 | } 29 | }); 30 | })); -------------------------------------------------------------------------------- /hupu-spider/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | 12 | ### IntelliJ IDEA ### 13 | .idea 14 | *.iws 15 | *.iml 16 | *.ipr 17 | 18 | ### NetBeans ### 19 | nbproject/private/ 20 | build/ 21 | nbbuild/ 22 | dist/ 23 | nbdist/ 24 | .nb-gradle/ -------------------------------------------------------------------------------- /hupu-spider/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.crow 7 | hupu-spider 8 | 0.0.1-SNAPSHOT 9 | jar 10 | 11 | hupu-spider 12 | Demo project for Spring Boot 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 1.5.7.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | mvn-repo 30 | http://maven.ansj.org/ 31 | 32 | 33 | 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-starter-data-jpa 38 | 39 | 40 | org.springframework.boot 41 | spring-boot-starter-web 42 | 43 | 44 | 45 | org.springframework.boot 46 | spring-boot-starter-test 47 | test 48 | 49 | 50 | 51 | us.codecraft 52 | 0.7.3 53 | webmagic-core 54 | 55 | 56 | us.codecraft 57 | 0.7.3 58 | webmagic-extension 59 | 60 | 61 | org.mybatis.spring.boot 62 | mybatis-spring-boot-starter 63 | 1.3.1 64 | 65 | 66 | mysql 67 | mysql-connector-java 68 | 5.1.21 69 | 70 | 71 | org.springframework.boot 72 | spring-boot-devtools 73 | 74 | 75 | org.ansj 76 | ansj_seg 77 | 5.1.1 78 | 79 | 80 | 81 | 82 | 83 | 84 | org.springframework.boot 85 | spring-boot-maven-plugin 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/HupuSpiderApplication.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | 4 | import org.springframework.boot.SpringApplication; 5 | import org.springframework.boot.autoconfigure.SpringBootApplication; 6 | 7 | 8 | 9 | //@RestController 10 | @SpringBootApplication 11 | public class HupuSpiderApplication { 12 | 13 | 14 | public static void main(String[] args) { 15 | SpringApplication.run(HupuSpiderApplication.class, args); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/Comment.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class Comment { 7 | 8 | private int id; 9 | private int litNum;//评论点亮数 10 | private String author; 11 | private String content; 12 | private String title; 13 | 14 | public int getId() { 15 | return id; 16 | } 17 | 18 | public void setId(int id) { 19 | this.id = id; 20 | } 21 | 22 | public int getLitNum() { 23 | return litNum; 24 | } 25 | 26 | public void setLitNum(int litNum) { 27 | this.litNum = litNum; 28 | } 29 | 30 | public String getAuthor() { 31 | return author; 32 | } 33 | 34 | public void setAuthor(String author) { 35 | this.author = author; 36 | } 37 | 38 | public String getContent() { 39 | return content; 40 | } 41 | 42 | public void setContent(String content) { 43 | this.content = content; 44 | } 45 | 46 | public String getTitle() { 47 | return title; 48 | } 49 | 50 | public void setTitle(String title) { 51 | this.title = title; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/CommentList.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Created by CrowHawk on 17/10/14. 7 | */ 8 | public class CommentList { 9 | private String title; 10 | private List contentList; 11 | private List litNumList; 12 | private List commentAuthors; 13 | 14 | public String getTitle() { 15 | return title; 16 | } 17 | 18 | public void setTitle(String title) { 19 | this.title = title; 20 | } 21 | 22 | public List getContentList() { 23 | return contentList; 24 | } 25 | 26 | public void setContentList(List contentList) { 27 | this.contentList = contentList; 28 | } 29 | 30 | public List getLitNumList() { 31 | return litNumList; 32 | } 33 | 34 | public void setLitNumList(List litNumList) { 35 | this.litNumList = litNumList; 36 | } 37 | 38 | public List getCommentAuthors() { 39 | return commentAuthors; 40 | } 41 | 42 | public void setCommentAuthors(List commentAuthors) { 43 | this.commentAuthors = commentAuthors; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/CommentMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/12. 8 | */ 9 | @Mapper 10 | public interface CommentMapper { 11 | @Insert("insert into comment (`content`,`author`,`lit_num`,`title`) values(#{content},#{author},#{litNum},#{title})") 12 | void insert(Comment comment); 13 | } 14 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/HupuBxjPostInfo.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/6. 5 | */ 6 | //@TargetUrl("https://bbs.hupu.com/\\d+.html") 7 | //@HelpUrl("https://bbs.hupu.com/bxj") 8 | 9 | //@Table(name = "PostInfo") 10 | public class HupuBxjPostInfo { 11 | 12 | //@Id 13 | //@GeneratedValue 14 | private int id; 15 | // @ExtractBy("//div[@class='bbs-hd-h1']/text()") 16 | private String title; 17 | 18 | // @ExtractBy("//div[@class='floor']/div[@class='floor-show']//@uname") 19 | private String author; 20 | 21 | private String text; 22 | 23 | public int getId() { 24 | return id; 25 | } 26 | 27 | public void setId(int id) { 28 | this.id = id; 29 | } 30 | 31 | public String getTitle() { 32 | return title; 33 | } 34 | 35 | public void setTitle(String title) { 36 | this.title = title; 37 | } 38 | 39 | public String getAuthor() { 40 | return author; 41 | } 42 | 43 | public void setAuthor(String author) { 44 | this.author = author; 45 | } 46 | 47 | public String getText() { 48 | return text; 49 | } 50 | 51 | public void setText(String text) { 52 | this.text = text; 53 | } 54 | 55 | @Override 56 | public String toString() { 57 | return "JobInfo{" + 58 | "title='" + title + '\'' + 59 | ", author='" + author + '\'' + 60 | '}'; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/Post.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class Post { 7 | 8 | private int id; 9 | private String title; 10 | private String author; 11 | private int replyNum; 12 | 13 | public int getId() { 14 | return id; 15 | } 16 | 17 | public void setId(int id) { 18 | this.id = id; 19 | } 20 | 21 | public String getTitle() { 22 | return title; 23 | } 24 | 25 | public void setTitle(String title) { 26 | this.title = title; 27 | } 28 | 29 | public String getAuthor() { 30 | return author; 31 | } 32 | 33 | public void setAuthor(String author) { 34 | this.author = author; 35 | } 36 | 37 | public int getReplyNum() { 38 | return replyNum; 39 | } 40 | 41 | public void setReplyNum(int replyNum) { 42 | this.replyNum = replyNum; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/PostInfoMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/6. 8 | */ 9 | @Mapper 10 | public interface PostInfoMapper { 11 | 12 | //#{title}和#{author}对应HupuBxjPostInfo对象的title和author属性 13 | @Insert("insert into PostInfo (`title`,`author`,`text`) values (#{title},#{author},#{text})") 14 | int add(HupuBxjPostInfo hupuBxjPostInfo); 15 | } 16 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/PostMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/12. 8 | */ 9 | @Mapper 10 | public interface PostMapper { 11 | @Insert("insert ignore into post (`title`,`author`,`reply_num`) values (#{title},#{author},#{replyNum})") 12 | void insert(Post post); 13 | } 14 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/ProxyIp.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | public class ProxyIp { 4 | private Long id; 5 | private String ip; 6 | private int port; 7 | 8 | public Long getId() { 9 | return id; 10 | } 11 | 12 | public void setId(Long id) { 13 | this.id = id; 14 | } 15 | 16 | public String getIp() { 17 | return ip; 18 | } 19 | 20 | public void setIp(String ip) { 21 | this.ip = ip == null ? null : ip.trim(); 22 | } 23 | 24 | public int getPort() { 25 | return port; 26 | } 27 | 28 | public void setPort(int port) { 29 | this.port = port; 30 | } 31 | } -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/ProxyIpMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | import org.apache.ibatis.annotations.Select; 6 | 7 | import java.util.List; 8 | 9 | @Mapper 10 | public interface ProxyIpMapper { 11 | 12 | @Insert("insert into ip_pool (`ip`,`port`) values (#{ip},#{port})") 13 | void insert(ProxyIp proxyIp); 14 | 15 | @Select("select * from ip_pool where id = #{id}") 16 | public ProxyIp findProxyIpById(int id); 17 | 18 | @Select("select * from ip_pool") 19 | public List findAllProxies(); 20 | } -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/TitleWord.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | 7 | /** 8 | * 帖子标题的分词结果 9 | */ 10 | public class TitleWord { 11 | private int id; 12 | private String word;//分词内容 13 | 14 | public int getId() { 15 | return id; 16 | } 17 | 18 | public void setId(int id) { 19 | this.id = id; 20 | } 21 | 22 | public String getWord() { 23 | return word; 24 | } 25 | 26 | public void setWord(String word) { 27 | this.word = word; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/TitleWordMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/12. 8 | */ 9 | @Mapper 10 | public interface TitleWordMapper { 11 | @Insert("insert into title_word (`word`) values (#{word})") 12 | void insert(TitleWord titleWord); 13 | } 14 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/User.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/11. 5 | */ 6 | public class User { 7 | 8 | private int id; 9 | private String name; 10 | private String gender; 11 | private String homeTeam;//用户主队 12 | private String address;//用户所在地 13 | private int views;//访问量 14 | 15 | public int getViews() { 16 | return views; 17 | } 18 | 19 | public void setViews(int views) { 20 | this.views = views; 21 | } 22 | 23 | public int getId() { 24 | return id; 25 | } 26 | 27 | public void setId(int id) { 28 | this.id = id; 29 | } 30 | 31 | public String getName() { 32 | return name; 33 | } 34 | 35 | public void setName(String name) { 36 | this.name = name; 37 | } 38 | 39 | public String getGender() { 40 | return gender; 41 | } 42 | 43 | public void setGender(String gender) { 44 | this.gender = gender; 45 | } 46 | 47 | public String getHomeTeam() { 48 | return homeTeam; 49 | } 50 | 51 | public void setHomeTeam(String homeTeam) { 52 | this.homeTeam = homeTeam; 53 | } 54 | 55 | public String getAddress() { 56 | return address; 57 | } 58 | 59 | public void setAddress(String address) { 60 | this.address = address; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/domain/UserMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/12. 8 | */ 9 | @Mapper 10 | public interface UserMapper { 11 | @Insert("replace into user (`name`,`gender`,`home_team`,`address`,`views`) values (#{name},#{gender},#{homeTeam},#{address},#{views})") 12 | void insert(User user); 13 | } 14 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/utils/IPCheckUtil.java: -------------------------------------------------------------------------------- 1 | package com.crow.utils; 2 | 3 | import java.net.HttpURLConnection; 4 | import java.net.InetSocketAddress; 5 | import java.net.Proxy; 6 | import java.net.URL; 7 | 8 | /** 9 | * Created by CrowHawk on 17/10/16. 10 | */ 11 | public class IPCheckUtil { 12 | /** 13 | * 校验代理IP的有效性,测试地址为:http://www.ip138.com 14 | * @param ip 代理IP地址 15 | * @param port 代理IP端口 16 | * @return 此代理IP是否有效 17 | */ 18 | public static boolean checkValidIP(String ip,Integer port){ 19 | URL url = null; 20 | HttpURLConnection connection = null; 21 | try { 22 | url = new URL("http://www.ip138.com"); 23 | //代理服务器 24 | InetSocketAddress proxyAddr = new InetSocketAddress(ip, port); 25 | Proxy proxy = new Proxy(Proxy.Type.HTTP, proxyAddr); 26 | connection = (HttpURLConnection) url.openConnection(proxy); 27 | connection.setReadTimeout(4000); 28 | connection.setConnectTimeout(4000); 29 | connection.setRequestMethod("GET"); 30 | 31 | if(connection.getResponseCode() == 200){ 32 | connection.disconnect(); 33 | return true; 34 | } 35 | 36 | } catch (Exception e) { 37 | connection.disconnect(); 38 | return false; 39 | } 40 | return false; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/utils/ProxyGeneratedUtil.java: -------------------------------------------------------------------------------- 1 | package com.crow.utils; 2 | 3 | /** 4 | * Created by CrowHawk on 17/10/16. 5 | */ 6 | public class ProxyGeneratedUtil { 7 | public static String authHeader(String orderno, String secret, int timestamp){ 8 | //拼装签名字符串 9 | String planText = String.format("orderno=%s,secret=%s,timestamp=%d", orderno, secret, timestamp); 10 | 11 | //计算签名 12 | String sign = org.apache.commons.codec.digest.DigestUtils.md5Hex(planText).toUpperCase(); 13 | 14 | //拼装请求头Proxy-Authorization的值 15 | String authHeader = String.format("sign=%s&orderno=%s×tamp=%d", sign, orderno, timestamp); 16 | return authHeader; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/utils/URLGeneratedUtil.java: -------------------------------------------------------------------------------- 1 | package com.crow.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * Created by CrowHawk on 17/10/7. 8 | */ 9 | public class URLGeneratedUtil { 10 | 11 | public final static String PREFIX = "https://bbs.hupu.com"; 12 | 13 | public final static int START_POS = 2;//爬取的起始目录页位置 14 | 15 | public final static int END_POS = 10;//爬取的目录页页数 16 | 17 | /* 18 | public static List generateListURL() { 19 | List urls = new ArrayList<>(); 20 | for(int i = START_POS; i <= END_POS; i++) { 21 | urls.add(PREFIX + "/bxj-" + i); 22 | } 23 | return urls; 24 | } 25 | */ 26 | 27 | public static List generatePostURLs(String postURL) { 28 | List urls = new ArrayList<>(); 29 | for(int i = START_POS; i <= END_POS; i++) { 30 | urls.add(postURL.substring(0, postURL.length() - 5) + "-" + i + ".html"); 31 | } 32 | return urls; 33 | } 34 | 35 | public static String generatePostURL(String url) { 36 | return PREFIX + url; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/utils/UserAgentUtil.java: -------------------------------------------------------------------------------- 1 | package com.crow.utils; 2 | 3 | import java.util.Random; 4 | 5 | /** 6 | * Created by CrowHawk on 17/10/16. 7 | */ 8 | public class UserAgentUtil { 9 | 10 | public static final String[] AGENTS = new String[]{ 11 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36", 12 | "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.04", 13 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36", 14 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0", 15 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)", 16 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", 18 | "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.1)", 19 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0", 20 | "Mozilla/5.0 (X11; Linux i686; rv:40.0) Gecko/20100101 Firefox/40.0", 21 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36", 22 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", 23 | "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11", 24 | "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25", 25 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)", 26 | "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1", 27 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2471.2 Safari/537.36", 28 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36", 29 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6", 30 | "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11", 31 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)", 32 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"}; 33 | 34 | public static String getRandomUserAgent() { 35 | return AGENTS[new Random().nextInt(AGENTS.length)]; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/web/StartUpController.java: -------------------------------------------------------------------------------- 1 | package com.crow.web; 2 | 3 | import com.crow.webmagic.downloader.CrowProxyProvider; 4 | import com.crow.webmagic.pageprocessor.HupuBxjPageProcessor; 5 | import com.crow.webmagic.pipeline.HupuSpiderPipeline; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.web.bind.annotation.GetMapping; 8 | import org.springframework.web.bind.annotation.RestController; 9 | import us.codecraft.webmagic.Spider; 10 | import us.codecraft.webmagic.downloader.HttpClientDownloader; 11 | import us.codecraft.webmagic.proxy.Proxy; 12 | 13 | /** 14 | * Created by CrowHawk on 17/10/8. 15 | */ 16 | @RestController 17 | public class StartUpController { 18 | 19 | @Autowired 20 | HupuSpiderPipeline hupuSpiderPipeline; 21 | /* 22 | @Autowired 23 | ProxyIpMapper proxyIpMapper; 24 | */ 25 | @GetMapping("/") 26 | public String index() { 27 | 28 | /* 29 | List proxyList = proxyIpMapper.findAllProxies(); 30 | proxyList = proxyList.subList(0,10); 31 | List proxies = new ArrayList<>(proxyList.size()); 32 | for(ProxyIp proxyIp : proxyList) { 33 | proxies.add(new Proxy(proxyIp.getIp(), proxyIp.getPort())); 34 | } 35 | */ 36 | HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); 37 | //设置动态转发代理,使用定制的ProxyProvider 38 | httpClientDownloader.setProxyProvider(CrowProxyProvider.from(new Proxy("forward.xdaili.cn", 80))); 39 | 40 | Spider.create(new HupuBxjPageProcessor()) 41 | //new PostInfoPageProcessor()) 42 | //.setDownloader(httpClientDownloader) 43 | .addUrl("https://bbs.hupu.com/bxj-1") 44 | //.addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html") 45 | .addPipeline(hupuSpiderPipeline) 46 | .thread(4) 47 | .run(); 48 | return "爬虫开启"; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/webmagic/downloader/CrowProxyProvider.java: -------------------------------------------------------------------------------- 1 | package com.crow.webmagic.downloader; 2 | 3 | import com.crow.utils.IPCheckUtil; 4 | import us.codecraft.webmagic.Page; 5 | import us.codecraft.webmagic.Task; 6 | import us.codecraft.webmagic.proxy.Proxy; 7 | import us.codecraft.webmagic.proxy.ProxyProvider; 8 | 9 | import java.util.ArrayList; 10 | import java.util.Collections; 11 | import java.util.List; 12 | import java.util.concurrent.atomic.AtomicInteger; 13 | 14 | /** 15 | * Created by CrowHawk on 17/10/16. 16 | */ 17 | 18 | /** 19 | * 自定义的ProxyProvider 20 | * 添加了代理有效性校验 21 | */ 22 | 23 | public class CrowProxyProvider implements ProxyProvider{ 24 | private final List proxies; 25 | private final AtomicInteger pointer; 26 | 27 | public CrowProxyProvider(List proxies) { 28 | this(proxies, new AtomicInteger(-1)); 29 | } 30 | 31 | private CrowProxyProvider(List proxies, AtomicInteger pointer) { 32 | this.proxies = proxies; 33 | this.pointer = pointer; 34 | } 35 | 36 | public static CrowProxyProvider from(Proxy... proxies) { 37 | ArrayList proxiesTemp = new ArrayList(proxies.length); 38 | Proxy[] var2 = proxies; 39 | int var3 = proxies.length; 40 | 41 | for(int var4 = 0; var4 < var3; ++var4) { 42 | Proxy proxy = var2[var4]; 43 | if(IPCheckUtil.checkValidIP(proxy.getHost(), proxy.getPort())) { 44 | proxiesTemp.add(proxy); 45 | } 46 | } 47 | 48 | proxiesTemp.trimToSize(); 49 | return new CrowProxyProvider(Collections.unmodifiableList(proxiesTemp)); 50 | } 51 | 52 | public void returnProxy(Proxy proxy, Page page, Task task) { 53 | } 54 | 55 | public Proxy getProxy(Task task) { 56 | return (Proxy)this.proxies.get(this.incrForLoop()); 57 | } 58 | 59 | private int incrForLoop() { 60 | int p = this.pointer.incrementAndGet(); 61 | int size = this.proxies.size(); 62 | if(p < size) { 63 | return p; 64 | } else { 65 | while(!this.pointer.compareAndSet(p, p % size)) { 66 | p = this.pointer.get(); 67 | } 68 | 69 | return p % size; 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /hupu-spider/src/main/java/com/crow/webmagic/pipeline/HupuSpiderPipeline.java: -------------------------------------------------------------------------------- 1 | package com.crow.webmagic.pipeline; 2 | 3 | import com.crow.domain.*; 4 | import org.springframework.beans.factory.annotation.Autowired; 5 | import org.springframework.stereotype.Component; 6 | import us.codecraft.webmagic.ResultItems; 7 | import us.codecraft.webmagic.Task; 8 | import us.codecraft.webmagic.pipeline.Pipeline; 9 | 10 | import java.util.Map; 11 | 12 | /** 13 | * Created by CrowHawk on 17/10/6. 14 | */ 15 | @Component("PostInfoPipeline") 16 | public class HupuSpiderPipeline implements Pipeline{ 17 | 18 | 19 | @Autowired 20 | private PostMapper postMapper; 21 | @Autowired 22 | private CommentMapper commentMapper; 23 | @Autowired 24 | private TitleWordMapper titleWordMapper; 25 | @Autowired 26 | private UserMapper userMapper; 27 | 28 | @Override 29 | public void process(ResultItems resultItems, Task task) { 30 | 31 | for(Map.Entry entry : resultItems.getAll().entrySet()) { 32 | 33 | if(entry.getKey().equals("postInfo")) { 34 | Post post = (Post) entry.getValue(); 35 | if(post != null) { 36 | postMapper.insert(post); 37 | } 38 | } 39 | if(entry.getKey().equals("commentInfo")) { 40 | CommentList commentList = (CommentList) entry.getValue(); 41 | for(int i = 0; i < commentList.getContentList().size(); i++) { 42 | Comment comment = new Comment(); 43 | comment.setTitle(commentList.getTitle()); 44 | comment.setContent(commentList.getContentList().get(i).replaceAll("(& nbsp;)", "")); 45 | comment.setLitNum(Integer.parseInt(commentList.getLitNumList().get(i))); 46 | comment.setAuthor(commentList.getCommentAuthors().get(i)); 47 | commentMapper.insert(comment); 48 | } 49 | } 50 | if(entry.getKey().equals("titleWordInfo")) { 51 | String[] strings = (String[])entry.getValue(); 52 | for(String word: strings) { 53 | TitleWord titleWord = new TitleWord(); 54 | titleWord.setWord(word); 55 | titleWordMapper.insert(titleWord); 56 | } 57 | } 58 | if(entry.getKey().equals("userInfo")) { 59 | User user = (User) entry.getValue(); 60 | if(user != null) { 61 | userMapper.insert(user); 62 | } 63 | } 64 | } 65 | 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /hupu-spider/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | datasource: 3 | url: jdbc:mysql://localhost:3306/HupuSpider?characterEncoding=UTF-8 4 | username: root 5 | password: wyj 6 | driver-class-name: com.mysql.jdbc.Driver 7 | 8 | mybatis: 9 | type-aliases-package: com.crow.domain -------------------------------------------------------------------------------- /hupu-spider/src/main/resources/db.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `comment`; 2 | 3 | CREATE TABLE `comment` ( 4 | `id` int(11) NOT NULL AUTO_INCREMENT, 5 | `lit_num` int(11) NOT NULL, 6 | `author` varchar(20) COLLATE utf8_unicode_ci NOT NULL, 7 | `content` varchar(2000) COLLATE utf8_unicode_ci NOT NULL, 8 | `title` varchar(45) COLLATE utf8_unicode_ci NOT NULL, 9 | PRIMARY KEY (`id`) 10 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 11 | 12 | DROP TABLE IF EXISTS `post`; 13 | 14 | CREATE TABLE `post` ( 15 | `id` int(11) NOT NULL AUTO_INCREMENT, 16 | `title` varchar(200) COLLATE utf8_unicode_ci NOT NULL, 17 | `author` varchar(20) COLLATE utf8_unicode_ci NOT NULL, 18 | `reply_num` int(11) NOT NULL, 19 | PRIMARY KEY (`id`) 20 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 21 | 22 | DROP TABLE IF EXISTS `title_word`; 23 | 24 | CREATE TABLE `title_word` ( 25 | `id` int(11) NOT NULL AUTO_INCREMENT, 26 | `word` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, 27 | PRIMARY KEY (`id`) 28 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 29 | 30 | DROP TABLE IF EXISTS `user`; 31 | 32 | CREATE TABLE `user` ( 33 | `id` int(11) NOT NULL AUTO_INCREMENT, 34 | `name` varchar(45) COLLATE utf8_unicode_ci NOT NULL, 35 | `gender` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL, 36 | `home_team` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, 37 | `address` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, 38 | PRIMARY KEY (`id`), 39 | UNIQUE KEY `name_UNIQUE` (`name`) 40 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -------------------------------------------------------------------------------- /hupu-spider/src/test/java/com/crow/HupuspiderApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class HupuspiderApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | String word = "欢迎/v"; 15 | if(word.matches("[\\u4e00-\\u9fa5]+/(n|v|a)")) { 16 | System.out.println(word.substring(0,word.length() - 2)); 17 | } 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /ip-spider/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | 12 | ### IntelliJ IDEA ### 13 | .idea 14 | *.iws 15 | *.iml 16 | *.ipr 17 | 18 | ### NetBeans ### 19 | nbproject/private/ 20 | build/ 21 | nbbuild/ 22 | dist/ 23 | nbdist/ 24 | .nb-gradle/ -------------------------------------------------------------------------------- /ip-spider/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.crow 7 | ip-spider 8 | 0.0.1-SNAPSHOT 9 | jar 10 | 11 | ip-spider 12 | Demo project for Spring Boot 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 1.5.8.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-data-jpa 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-web 35 | 36 | 37 | 38 | org.springframework.boot 39 | spring-boot-starter-test 40 | test 41 | 42 | 43 | 44 | us.codecraft 45 | 0.7.3 46 | webmagic-core 47 | 48 | 49 | us.codecraft 50 | 0.7.3 51 | webmagic-extension 52 | 53 | 54 | org.mybatis.spring.boot 55 | mybatis-spring-boot-starter 56 | 1.3.1 57 | 58 | 59 | mysql 60 | mysql-connector-java 61 | 5.1.21 62 | 63 | 64 | org.springframework.boot 65 | spring-boot-devtools 66 | 67 | 68 | org.ansj 69 | ansj_seg 70 | 5.1.1 71 | 72 | 73 | 74 | 75 | 76 | 77 | org.springframework.boot 78 | spring-boot-maven-plugin 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/IpspiderApplication.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class IpspiderApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(IpspiderApplication.class, args); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/domain/ProxyIp.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import java.io.Serializable; 4 | 5 | public class ProxyIp implements Serializable{ 6 | private static final long serialVersionUID = -3699072211264713025L; 7 | 8 | private Long id; 9 | 10 | private String ip; 11 | 12 | private Integer port; 13 | 14 | public Long getId() { 15 | return id; 16 | } 17 | 18 | public void setId(Long id) { 19 | this.id = id; 20 | } 21 | 22 | public String getIp() { 23 | return ip; 24 | } 25 | 26 | public void setIp(String ip) { 27 | this.ip = ip == null ? null : ip.trim(); 28 | } 29 | 30 | public Integer getPort() { 31 | return port; 32 | } 33 | 34 | public void setPort(Integer port) { 35 | this.port = port; 36 | } 37 | } -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/domain/ProxyIpMapper.java: -------------------------------------------------------------------------------- 1 | package com.crow.domain; 2 | 3 | import org.apache.ibatis.annotations.Insert; 4 | import org.apache.ibatis.annotations.Mapper; 5 | 6 | @Mapper 7 | public interface ProxyIpMapper { 8 | 9 | @Insert("insert into ip_pool (`ip`,`port`) values (#{ip},#{port})") 10 | void insert(ProxyIp proxyIp); 11 | } -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/utils/UserAgentUtil.java: -------------------------------------------------------------------------------- 1 | package com.crow.utils; 2 | 3 | import java.util.Random; 4 | 5 | /** 6 | * Created by CrowHawk on 17/10/16. 7 | */ 8 | public class UserAgentUtil { 9 | 10 | public static final String[] AGENTS = new String[]{ 11 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36", 12 | "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.04", 13 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36", 14 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0", 15 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)", 16 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", 18 | "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.1)", 19 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0", 20 | "Mozilla/5.0 (X11; Linux i686; rv:40.0) Gecko/20100101 Firefox/40.0", 21 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36", 22 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", 23 | "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11", 24 | "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25", 25 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)", 26 | "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1", 27 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2471.2 Safari/537.36", 28 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36", 29 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6", 30 | "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11", 31 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)", 32 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"}; 33 | 34 | public static String getRandomUserAgent() { 35 | return AGENTS[new Random().nextInt(AGENTS.length)]; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/web/StartUpController.java: -------------------------------------------------------------------------------- 1 | package com.crow.web; 2 | 3 | import com.crow.webmagic.pageprocessor.ProxyPoolProcessor1; 4 | import com.crow.webmagic.pageprocessor.ProxyPoolProcessor2; 5 | import com.crow.webmagic.pipeline.IPSpiderPipeline; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.web.bind.annotation.GetMapping; 8 | import org.springframework.web.bind.annotation.RestController; 9 | import us.codecraft.webmagic.Spider; 10 | 11 | /** 12 | * Created by CrowHawk on 17/10/8. 13 | */ 14 | @RestController 15 | public class StartUpController { 16 | 17 | @Autowired 18 | IPSpiderPipeline ipSpiderPipeline; 19 | 20 | @GetMapping("/pool1") 21 | public String index1() { 22 | 23 | Spider.create(new ProxyPoolProcessor1()) 24 | .addUrl("http://www.xicidaili.com/nn") 25 | //.addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html") 26 | .addPipeline(ipSpiderPipeline) 27 | .thread(4) 28 | .run(); 29 | return "爬虫开启1"; 30 | } 31 | 32 | @GetMapping("/pool2") 33 | public String index2() { 34 | 35 | Spider.create(new ProxyPoolProcessor2()) 36 | .addUrl("http://www.kuaidaili.com/free/") 37 | //.addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html") 38 | .addPipeline(ipSpiderPipeline) 39 | .thread(4) 40 | .run(); 41 | return "爬虫开启2"; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/webmagic/pageprocessor/ProxyPoolProcessor1.java: -------------------------------------------------------------------------------- 1 | package com.crow.webmagic.pageprocessor; 2 | 3 | import com.crow.domain.ProxyIp; 4 | import com.crow.utils.UserAgentUtil; 5 | import us.codecraft.webmagic.Page; 6 | import us.codecraft.webmagic.Site; 7 | import us.codecraft.webmagic.processor.PageProcessor; 8 | import us.codecraft.webmagic.selector.Html; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * Created by CrowHawk on 17/10/16. 15 | */ 16 | public class ProxyPoolProcessor1 implements PageProcessor{ 17 | 18 | private Site site = Site.me().setTimeOut(6000).setRetryTimes(3) 19 | .setSleepTime(1000) 20 | .setCharset("UTF-8") 21 | .addHeader("Accept-Encoding", "/") 22 | .setUserAgent(UserAgentUtil.getRandomUserAgent()); 23 | 24 | 25 | @Override 26 | public void process(Page page) { 27 | List ipList = page.getHtml().xpath("//table[@id='ip_list']/tbody/tr").all(); 28 | List result = new ArrayList<>(); 29 | 30 | if(ipList != null && ipList.size() > 0){ 31 | ipList.remove(0); //移除表头 32 | for(String tmp : ipList){ 33 | Html html = Html.create(tmp); 34 | ProxyIp proxyIp = new ProxyIp(); 35 | String[] data = html.xpath("//body/text()").toString().trim().split("\\s+"); 36 | 37 | proxyIp.setIp(data[0]); 38 | proxyIp.setPort(Integer.valueOf(data[1])); 39 | result.add(proxyIp); 40 | } 41 | } 42 | page.putField("result", result); 43 | page.addTargetRequest("http://www.xicidaili.com/nn/2"); 44 | page.addTargetRequest("http://www.xicidaili.com/nt/"); 45 | } 46 | 47 | @Override 48 | public Site getSite() { 49 | return site; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/webmagic/pageprocessor/ProxyPoolProcessor2.java: -------------------------------------------------------------------------------- 1 | package com.crow.webmagic.pageprocessor; 2 | 3 | import com.crow.domain.ProxyIp; 4 | import com.crow.utils.UserAgentUtil; 5 | import us.codecraft.webmagic.Page; 6 | import us.codecraft.webmagic.Site; 7 | import us.codecraft.webmagic.processor.PageProcessor; 8 | import us.codecraft.webmagic.selector.Html; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * Created by CrowHawk on 17/10/16. 15 | */ 16 | public class ProxyPoolProcessor2 implements PageProcessor { 17 | 18 | private Site site = Site.me().setDisableCookieManagement(true) 19 | .setTimeOut(6000).setRetryTimes(3) 20 | .setSleepTime(1000) 21 | .setCharset("UTF-8") 22 | .addHeader("Accept-Encoding", "/") 23 | .setUserAgent(UserAgentUtil.getRandomUserAgent()); 24 | 25 | @Override 26 | public void process(Page page) { 27 | List ipList = page.getHtml().xpath("//table[@class='table table-bordered table-striped']/tbody/tr").all(); 28 | List result = new ArrayList<>(); 29 | 30 | if(ipList != null && ipList.size() > 0){ 31 | for(String tmp : ipList){ 32 | Html html = Html.create(tmp); 33 | ProxyIp proxyIp = new ProxyIp(); 34 | String[] data = html.xpath("//body/text()").toString().trim().split("\\s+"); 35 | 36 | proxyIp.setIp(data[0]); 37 | proxyIp.setPort(Integer.valueOf(data[1])); 38 | 39 | result.add(proxyIp); 40 | } 41 | } 42 | page.putField("result", result); 43 | page.addTargetRequest("http://www.kuaidaili.com/free/inha/2/"); 44 | page.addTargetRequest("http://www.kuaidaili.com/free/intr/1/"); 45 | } 46 | 47 | @Override 48 | public Site getSite() { 49 | return site; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /ip-spider/src/main/java/com/crow/webmagic/pipeline/IPSpiderPipeline.java: -------------------------------------------------------------------------------- 1 | package com.crow.webmagic.pipeline; 2 | 3 | import com.crow.domain.ProxyIp; 4 | import com.crow.domain.ProxyIpMapper; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | /** 15 | * Created by CrowHawk on 17/10/16. 16 | */ 17 | @Component("IPSpiderPipeline") 18 | public class IPSpiderPipeline implements Pipeline { 19 | 20 | @Autowired 21 | ProxyIpMapper proxyIpMapper; 22 | 23 | @Override 24 | public void process(ResultItems resultItems, Task task) { 25 | for(Map.Entry entry : resultItems.getAll().entrySet()) { 26 | if (entry.getKey().equals("result")) { 27 | List ipList = (List) entry.getValue(); 28 | for(ProxyIp proxyIp: ipList) { 29 | proxyIpMapper.insert(proxyIp); 30 | } 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /ip-spider/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | datasource: 3 | url: jdbc:mysql://localhost:3306/HupuSpider?characterEncoding=UTF-8 4 | username: root 5 | password: wyj 6 | driver-class-name: com.mysql.jdbc.Driver 7 | 8 | -------------------------------------------------------------------------------- /ip-spider/src/test/java/com/crow/DataprocessingApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class DataprocessingApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /ip-spider/src/test/java/com/crow/IpspiderApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class IpspiderApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /ip-spider/src/test/java/com/crow/MagictoeApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.crow; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class MagictoeApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | --------------------------------------------------------------------------------