hit : hits) { 252 | Article source = hit.source; 253 | System.out.println("标题:"+source.getTitle()); 254 | System.out.println("内容:"+source.getContent()); 255 | System.out.println("url:"+source.getUrl()); 256 | System.out.println("来源:"+source.getSource()); 257 | System.out.println("作者:"+source.getAuthor()); 258 | } 259 | } 260 | 261 | 262 | private static void createSearch(String queryString) throws Exception { 263 | JestClient jestClient = JestExample.getJestClient(); 264 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 265 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 266 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 267 | highlightBuilder.field("title");//高亮title 268 | highlightBuilder.field("content");//高亮content 269 | highlightBuilder.preTags("").postTags("");//高亮标签 270 | highlightBuilder.fragmentSize(200);//高亮内容长度 271 | searchSourceBuilder.highlight(highlightBuilder); 272 | Search search = new Search.Builder(searchSourceBuilder.toString()) 273 | .addIndex("article") 274 | .build(); 275 | SearchResult result = jestClient.execute(search); 276 | System.out.println("本次查询共查到:"+result.getTotal()+"篇文章!"); 277 | List> hits = result.getHits(Article.class); 278 | for (Hit hit : hits) { 279 | Article source = hit.source; 280 | //获取高亮后的内容 281 | Map> highlight = hit.highlight; 282 | List titlelist = highlight.get("title");//高亮后的title 283 | if(titlelist!=null){ 284 | source.setTitle(titlelist.get(0)); 285 | } 286 | List contentlist = highlight.get("content");//高亮后的content 287 | if(contentlist!=null){ 288 | source.setContent(contentlist.get(0)); 289 | } 290 | System.out.println("标题:"+source.getTitle()); 291 | System.out.println("内容:"+source.getContent()); 292 | System.out.println("url:"+source.getUrl()); 293 | System.out.println("来源:"+source.getSource()); 294 | System.out.println("作者:"+source.getAuthor()); 295 | } 296 | } 297 | 298 | private static void bulkIndex() throws Exception { 299 | JestClient jestClient = JestExample.getJestClient(); 300 | 301 | Article article1 = new Article(4,"中国获租巴基斯坦瓜达尔港2000亩土地 为期43年","巴基斯坦(瓜达尔港)港务局表示,将把瓜达尔港2000亩土地,长期租赁给中方,用于建设(瓜达尔港)首个经济特区。分析指,瓜港首个经济特区的建立,不但能对巴基斯坦的经济发展模式,产生示范作用,还能进一步提振经济水平。" + 302 | "据了解,瓜达尔港务局于今年6月完成了1500亩土地的征收工作,另外500亩的征收工作也将很快完成,所征土地主要来自巴基斯坦海军和俾路支省政府紧邻规划的集装箱货堆区,该经济特区相关基础设施建设预计耗资3500万美元。瓜港务局表示,目前已将这2000亩地租赁给中国,中方将享有43年的租赁权。巴基斯坦前驻华大使马苏德·汗表示,这对提振巴基斯坦经济大有助益:“我认为巴基斯坦所能获得的最大益处主要还是在于经济互通领域”。" + 303 | "为了鼓励国内外投资者到经济特区投资,巴政府特别针对能源、税制等国内投资短板,专门为投资者出台了利好政策来鼓励投资。这些举措包括三个方面,一是保障能源,即保障经济特区的电力和天然气供应方面享有优先权;二是减免税收,即为经济特区投资的企业提供为期10年的税收假期,并为企业有关生产设备的进口给予免关税待遇;三是一站式服务,即为有意投资经济特区的投资者提供一站式快捷服务,包括向投资者提供有关优惠政策的详尽资讯。马苏德·汗还指出,为了让巴基斯坦从投资中受益,巴政府尽可能提供了各种优惠政策:“(由于)巴基斯坦想从中获益,为此做了大量力所能及的工作”。" + 304 | "巴政府高度重视瓜港经济特区建设,将其视为现代化港口的“标配”,期待其能够最大化利用瓜港深水良港的自然禀赋,吸引国内外投资者建立生产、组装以及加工企业。为鼓励投资,巴方还开出了20年免税的优惠条件。" + 305 | "除了瓜达尔港,中巴还有哪些项目?" + 306 | "根据中国和巴基斯坦两国4月20日发表的联合声明,双方将积极推进喀喇昆仑公路升级改造二期(塔科特至哈维连段)、瓜达尔港东湾快速路、新国际机场、卡拉奇至拉合尔高速公路(木尔坦至苏库尔段)、拉合尔轨道交通橙线、海尔-鲁巴经济区、中巴跨境光缆、在巴实行地面数字电视传输标准等重点合作项目及一批基础设施和能源电力项目。" + 307 | "应巴基斯坦总统侯赛因和总理谢里夫邀请,中国国家主席习近平于4月20日至21日对巴基斯坦进行国事访问。访问期间,习近平主席会见了侯赛因总统、谢里夫总理以及巴基斯坦议会、军队和政党领导人,同巴各界人士进行了广泛接触。" + 308 | "双方高度评价将中巴经济走廊打造成丝绸之路经济带和21世纪海上丝绸之路倡议重大项目所取得的进展。巴方欢迎中方设立丝路基金并将该基金用于中巴经济走廊相关项目。" + 309 | "巴方将坚定支持并积极参与“一带一路”建设。丝路基金宣布入股三峡南亚公司,与长江三峡集团等机构联合开发巴基斯坦卡洛特水电站等清洁能源项目,这是丝路基金成立后的首个投资项目。丝路基金愿积极扩展中巴经济走廊框架下的其他项目投融资机会,为“一带一路”建设发挥助推作用。" + 310 | "双方认为,“一带一路”倡议是区域合作和南南合作的新模式,将为实现亚洲整体振兴和各国共同繁荣带来新机遇。" + 311 | "双方对中巴经济走廊建设取得的进展表示满意,强调走廊规划发展将覆盖巴全国各地区,造福巴全体人民,促进中巴两国及本地区各国共同发展繁荣。" + 312 | "双方同意,以中巴经济走廊为引领,以瓜达尔港、能源、交通基础设施和产业合作为重点,形成“1+4”经济合作布局。双方欢迎中巴经济走廊联委会第四次会议成功举行,同意尽快完成《中巴经济走廊远景规划》。", "http://news.163.com/15/0909/14/B332O90E0001124J.html", Calendar.getInstance().getTime(), "中国青年网", "匿名"); 313 | Article article2 = new Article(5,"中央党校举行秋季学期开学典礼 刘云山出席讲话","新华网北京9月7日电 中共中央党校7日上午举行2015年秋季学期开学典礼。中共中央政治局常委、中央党校校长刘云山出席并讲话,就深入学习贯彻习近平总书记系列重要讲话精神、坚持党校姓党提出要求。" + 314 | "刘云山指出,党校姓党是党校工作的根本原则。坚持党校姓党,重要的是坚持坚定正确的政治方向、贯彻实事求是的思想路线、落实从严治校的基本方针。要把党的基本理论教育和党性党风教育作为主课,深化中国特色社会主义理论体系学习教育,深化对习近平总书记系列重要讲话精神的学习教育,深化党章和党纪党规的学习教育。要坚持实事求是的思想方法和工作方法,弘扬理论联系实际的学风,提高教学和科研工作的针对性实效性。要严明制度、严肃纪律,把从严治校要求体现到党校工作和学员管理各方面,使党校成为不正之风的“净化器”。" + 315 | "刘云山指出,坚持党校姓党,既是对党校教职工的要求,也是对党校学员的要求。每一位学员都要强化党的意识,保持对党忠诚的政治品格,忠诚于党的信仰,坚定道路自信、理论自信、制度自信;忠诚于党的宗旨,牢记为了人民是天职、服务人民是本职,自觉践行党的群众路线;忠诚于党的事业,勤政敬业、先之劳之、敢于担当,保持干事创业的进取心和精气神。要强化党的纪律规矩意识,经常看一看党的政治准则、组织原则执行得怎么样,看一看党的路线方针政策落实得怎么样,看一看重大事项请示报告制度贯彻得怎么样,找差距、明不足,做政治上的明白人、遵规守纪的老实人。" + 316 | "刘云山强调,领导干部来党校学习,就要自觉接受党的优良作风的洗礼,修好作风建设这门大课。要重温党的光荣传统,学习革命先辈、英雄模范和优秀典型的先进事迹和崇高风范,自觉践行社会主义核心价值观,永葆共产党人的先进性纯洁性,以人格力量传递作风建设正能量。要认真落实党中央关于从严治党、改进作风的一系列要求,贯彻从严精神、突出问题导向,把自己摆进去、把职责摆进去,推动思想问题和实际问题一起解决,履行好党和人民赋予的职责使命。" + 317 | "赵乐际出席开学典礼。" + 318 | "中央有关部门负责同志,中央党校校委成员、全体学员和教职工在主会场参加开学典礼。中国浦东、井冈山、延安干部学院全体学员和教职工在分会场参加开学典礼。", "http://news.163.com/15/0907/20/B2UGF9860001124J.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 319 | Article article3 = new Article(6,"俞正声率中央代表团赴大昭寺看望宗教界人士","国际在线报道:7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员赴大昭寺慰问宗教界爱国人士。俞正声向大昭寺赠送了习近平总书记题写的“加强民族团结,建设美丽西藏”贺幛,珐琅彩平安瓶,并向僧人发放布施,他在会见僧众时表示,希望藏传佛教坚持爱国爱教传统。" + 320 | "至今已有1300多年历史的大昭寺,在藏传佛教中拥有至高无上的地位。西藏的寺院归属于某一藏传佛教教派,大昭寺则是各教派共尊的神圣寺院。一年四季不论雨雪风霜,大昭寺外都有从四面八方赶来磕长头拜谒的虔诚信众。" + 321 | "7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员专门到大昭寺看望僧众,“我代表党中央、国务院和习主席向大家问好。藏传佛教有许多爱国爱教的传统,有许多高僧大德维护祖国统一和民族团结,坚持传播爱国爱教的正信。党和政府对此一贯给予充分肯定,并对藏传佛教的发展给予了支持。”" + 322 | "俞正声回忆这已是他自1995年以来第三次来大昭寺。他表示,过去二十年发生了巨大的变化,而藏传佛教的发展与祖国的发展、西藏的发展息息相关。藏传佛教要更好发展必须与社会主义社会相适应。他也向僧人们提出期望:“佛教既是信仰,也是一种文化和学问,希望大家不断提高自己对佛教的认识和理解,提高自己的水平。希望大家更好地管理好大昭寺,搞好民主管理、科学管理,使我们的管理更加规范。”" + 323 | "今天是中央代表团抵达拉萨的第二天,当天安排有多项与自治区成立五十周年相关活动。继上午接见自治区领导成员、离退休老同志、各族各界群众代表宗教界爱国人士,参观自治区50年成就展外,中央代表团下午还慰问了解放军驻拉萨部队、武警总队等。当天晚些时候,庆祝西藏自治区成立50周年招待会、文艺晚会将在拉萨举行。(来源:环球资讯)", "http://news.163.com/15/0907/16/B2U3O30R00014JB5.html", Calendar.getInstance().getTime(), "国际在线", "全宇虹"); 324 | Article article4 = new Article(7,"张德江:发挥人大主导作用 加快完备法律规范体系","新华网广州9月7日电 中共中央政治局常委、全国人大常委会委员长张德江9月6日至7日在广东出席第21次全国地方立法研讨会,并在佛山市就地方人大工作进行调研。他强调,要全面贯彻落实党的十八大和十八届三中、四中全会精神,深入学习贯彻习近平总书记系列重要讲话精神,认真实施立法法,充分发挥人大及其常委会在立法工作中的主导作用,抓住提高立法质量这个关键,加快形成完备的法律规范体系,为协调推进“四个全面”战略布局提供法治保障。" + 325 | "张德江指出,立法权是人大及其常委会的重要职权。做好新形势下立法工作,要坚持党的领导,贯彻党的路线方针政策和中央重大决策部署,切实增强思想自觉和行动自觉。要牢固树立依法立法、为民立法、科学立法理念,尊重改革发展客观规律和法治建设内在规律,加强重点领域立法,做到立法主动适应改革和经济社会发展需要。充分发挥在立法工作中的主导作用,把握立项、起草、审议等关键环节,科学确定立法项目,协调做好立法工作,研究解决立法中的重点难点问题,建立健全立法工作格局,形成立法工作合力。" + 326 | "张德江说,地方性法规是我国法律体系的重要组成部分。地方立法关键是在本地特色上下功夫、在有效管用上做文章。当前要落实好立法法的规定,扎实推进赋予设区的市地方立法权工作,明确步骤和时间,做好各项准备工作,标准不能降低,底线不能突破,坚持“成熟一个、确定一个”,确保立法质量。" + 327 | "张德江强调,加强和改进立法工作,要有高素质的立法工作队伍作为保障。要把思想政治建设摆在首位,全面提升专业素质能力,充实力量,培养人才,努力造就一支忠于党、忠于国家、忠于人民、忠于法律的立法工作队伍。" + 328 | "张德江一直非常关注地方人大特别是基层人大工作。在粤期间,他来到佛山市人大常委会,详细询问立法、监督等工作情况,希望他们与时俱进、开拓创新,切实担负起宪法法律赋予的职责。他走进南海区人大常委会、顺德区乐从镇人大主席团,同基层人大代表和人大工作者亲切交谈,肯定基层人大代表联系群众的有益做法,强调人大代表不能脱离人民群众,必须把人民利益放在心中,时刻为群众着想,听取群众意见,反映群众意愿,帮助群众解决实际问题。张德江指出,县乡人大在基层治理体系和治理能力建设中具有重要作用。要贯彻落实中央关于加强县乡人大工作和建设的精神,认真实施新修改的地方组织法、选举法、代表法,不断提高基层人大工作水平,推动人大工作迈上新台阶。" + 329 | "中共中央政治局委员、广东省委书记胡春华参加上述活动。", "http://news.163.com/15/0907/20/B2UGEUTJ00014PRF.html", Calendar.getInstance().getTime(), "新华网", "陈菲"); 330 | 331 | Bulk bulk = new Bulk.Builder() 332 | .defaultIndex("article") 333 | .defaultType("article") 334 | .addAction(Arrays.asList( 335 | new Index.Builder(article1).build(), 336 | new Index.Builder(article2).build(), 337 | new Index.Builder(article3).build(), 338 | new Index.Builder(article4).build() 339 | )).build(); 340 | jestClient.execute(bulk); 341 | } 342 | 343 | 344 | private static void createIndex() throws Exception { 345 | JestClient jestClient = JestExample.getJestClient(); 346 | Article article1 = new Article(1,"高圆圆身上淤青 遭家暴还是玩SM遭性虐?","近日,有媒体拍到高圆圆身上的淤青,腿上还有两块伤疤,引起不少人猜测是遭受家暴。" + 347 | "对于遭到家暴的传闻,高圆圆首次作出澄清,称这是因为照顾母亲而留下的伤痕,她跟赵又廷关系好得很。" + 348 | "照顾母亲竟然会留下伤痕?究竟是怎么照顾的。" + 349 | "高圆圆称,“我妈当时住院,她翻身是需要旁人帮助的,我要到床头去,抱起她的上臂,然后她的脚一蹬,这样才能翻过来。" + 350 | "但我们两个的力气都不够,每次一用力的时候,我的大腿就会刚好撞在那个床框上,所以大腿上就撞出那两块淤青了。" + 351 | "事情真的这么简单吗?即使稍微一撞,也不至于淤青吧!" + 352 | "看到那个伤疤以及淤青的皮肤,不得不让人怀疑高圆圆是遭受家暴。" + 353 | "当然,还有另外一个原因,就是玩SM遭性虐。" + 354 | "当然,这么变态的事情,相信女神不会做的。" + 355 | "是照顾母亲留下的伤痕也好,遭受家暴也好,希望女神高圆圆以后都能平平安安健健康康吧!", "http://www.vdfly.com/star/20141119/37968.html", Calendar.getInstance().getTime(), "青春娱乐网", "匿名"); 356 | Article article2 = new Article(2,"习近平:希望新西兰保障输华产品质量安全","新华网惠灵顿11月20日电(记者陈贽 田帆刘华)国家主席习近平20日在惠灵顿同新西兰总理约翰·基举行会谈,双方决定,将中新关系提升为全面战略伙伴关系,共建中新两国利益共同体,推动两国合作不断迈上新台阶。" + 357 | "习近平指出,中新两国相互理解、相互包容、平等相待,建立了高水平政治互信,开展了宽领域互利合作,两国人民都支持发展中新关系。" + 358 | "两国决定建立全面战略伙伴关系,为中新关系发展指明了大方向。" + 359 | "习近平强调,两国要保持高层交往,构筑多层次多渠道交流合作格局。" + 360 | "中新自由贸易协定是中国同发达国家之间达成的第一个自由贸易协定。双方要采取积极举措,推动两国经贸关系不断迈上新台阶,力争早日实现2020年双边贸易额达到300亿新元的新目标。" + 361 | "双方要继续巩固和提升农牧业等传统领域合作。中国有13亿多人口,市场大得很。新西兰乳制品、羊毛、牛羊肉、海产品等优质产品在中国很受欢迎。" + 362 | "希望新方切实保障输华产品质量安全,保障中国消费者权益。金融服务、信息技术、节能环保、生物医药等是中国重点发展的产业,新西兰有竞争力,双方可以开展更多合作。" + 363 | "习近平强调,前不久,中国同其他各方一道,推动亚太经合组织第二十二次领导人非正式会议启动了亚太自由贸易区进程。中新都是亚太经合组织成员,也都是区域全面经济伙伴关系协定谈判方,双方可以在这些机制中加强协调合作,通过打造惠及各方的地区自由贸易安排、建设好亚洲基础设施投资银行,推进亚太经济一体化进程。" + 364 | "南太平洋地区也是中方提出的21世纪海上丝绸之路的自然延伸,我们欢迎新方参与进来,使中新经贸合作取得更大发展。习近平强调,中新两国要加强人文交流,增进相互了解和友谊,中方将在新西兰设立中国文化中心。" + 365 | "中新签署电视合拍协议是中国政府同外国政府签署的首个电视合拍协议。双方要加强防务、执法交流,在打击腐败、追逃追赃等方面开展合作。" + 366 | "中方愿意同新方在南极、太平洋岛国事务上加强合作。约翰·基表示,建交42年来,新中关系取得长足进展,合作不断深化和扩大,彼此成为好伙伴。" + 367 | "新方致力于同中方一道建设好两国全面战略伙伴关系,在中方核心利益和重大关切问题上,新方将继续支持中方。新中签署自由贸易协定6年来,两国合作取得丰硕成果,新形势下,要提高双边贸易投资水平。" + 368 | "新方希望扩大新西兰农产品、乳制品对华出口,欢迎中国企业前来投资。新方重视亚洲基础设施投资银行的作用,将积极参与银行建设。新方将简化签证手续,欢迎更多中国公民前来旅游、留学。" + 369 | "中方积极节能减排,为国际社会合作应对气候变化树立了典范,新方希望同中方加强合作。我祝贺中方成功举办了亚太经合组织领导人非正式会议,愿意同中方一道,推动区域一体化进程,促进亚太地区和平与繁荣。" + 370 | "会谈后,双方发表《中国和新西兰关于建立全面战略伙伴关系的联合声明》。习近平和约翰·基共同见证了多项双边合作文件的签署,涉及气候变化、电视、教育、南极、金融、旅游、食品安全等领域。两国领导人还共同会见记者。" + 371 | "习近平强调,中新关系具有开创性、示范性意义。中新建立了全面战略伙伴关系,为两国关系规划了宏伟蓝图。中新两国签署一系列合作协议,充分展示了两国务实合作的广度和深度。" + 372 | "中国人说:“兄弟同心,其利断金。”毛利族谚语说:“你我篮子在一起,大家生活更美好。”让我们携手合作,谱写中新关系发展新篇章,更好造福两国人民。约翰·基表示,新西兰钦佩中国的非凡成就,相信中国的发展不仅造福中国人民,也给包括新西兰在内的各国带来巨大机遇。" + 373 | "我们愿同中方共同推动两国全面战略伙伴关系发展。王沪宁、栗战书、杨洁篪等参加上述活动。", "http://news.163.com/14/1120/15/ABGM1POL00014JB5.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 374 | Article article3 = new Article(3,"周末陕西将迎雨雪天气 关中近日大部雾霾缭绕","本报讯(首席记者 姬娜)前天,我省47个县城出现雾霾。昨天,关中大部分地区依旧雾霾缭绕,能见度差。预计22日-24日有雨雪天气,届时雾霾或将能得到缓解。对于这样的天气,微信上不再如之前有那么多埋怨牢骚,更多的是无奈。" + 375 | "刘先生说:“现在有雾霾是正常的,天气晴好反倒是不正常的,我已经习惯了。”网友“舟子”调侃称:“与其埋怨世界不如改变自己,多多欣赏一下这霾天气,那是多么美妙的一种朦胧感,让我想起了初恋。”" + 376 | "省气象台专家称,昨天,最低气温略有回升,陕北大部在-4-4℃,关中大部在0-8℃,陕南大部4-9℃,全省最低温出现在吴起, -3.8℃。有了雾霾和云层的包 围,西安温度在6-17℃之间。" + 377 | "预计本月下旬全省以阴天、多云为主,22-24日有降水天气。今天:陕北、关中多云间晴天,陕南多云转阴天;21日:全省多云;22日:陕北多云间阴天,关中、陕南多云转阴天,部分地方有小雨;23日:全省阴天,关中部分、陕南部分地方有小雨;24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。西安今天晴转多云,3-15℃;明天多云,后天阴天。据省气象局消息:昨天我省晴间多云,无明显冷空气活动,早晨大部分地方出现轻雾,11时后泾河、周至、长安、户县、渭南、蒲城、韩城、华县、咸阳、彬县、武功等地出现霾。" + 378 | "受雾霾影响,关中地区除宝鸡外,西安、咸阳、渭南、铜川空气质量为中度污染。今晨0℃气温线主要位于延安南部,07时气温具体分布:陕北大部-4~2℃(最低吴起-4.1℃),关中大部、商洛2~6℃(西安泾河5.8℃),汉中、安康5~10℃。渭南市气象台昨天08时发布霾黄色预警信号:预计未来24小时内渭南大部地区将出现中度霾,局地有重度霾,易形成中度到重度空气污染。今明两天大部地方多云 22-24日有降水过程今天白天:陕北、关中多云间晴天,陕南多云间阴天。早晨关中、陕南部分地方有雾或霾。今天晚上:陕北、关中多云间晴天,陕南多云间阴天。" + 379 | "21日:陕北、关中多云,陕南多云转阴天,南部局地有小雨。22日:陕北、关中多云转阴天,陕南阴天,关中南部和陕南大部有小雨。23日:全省阴天,关中部分有小雨,陕南有小到中雨。24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。25日:全省阴天转多云。26日:全省多云。后期天气趋势(27-29日):我省以阴到多云为主,中南部仍有阴雨天气。", "http://xian.qq.com/a/20141120/011300.htm", Calendar.getInstance().getTime(), "三秦都市报 - 三秦网", "姬娜"); 380 | Index index1 = new Index.Builder(article1).index("article").type("article").build(); 381 | Index index2 = new Index.Builder(article2).index("article").type("article").build(); 382 | Index index3 = new Index.Builder(article3).index("article").type("article").build(); 383 | JestResult jestResult1 = jestClient.execute(index1); 384 | System.out.println(jestResult1.getJsonString()); 385 | JestResult jestResult2 = jestClient.execute(index2); 386 | System.out.println(jestResult2.getJsonString()); 387 | JestResult jestResult3 = jestClient.execute(index3); 388 | System.out.println(jestResult3.getJsonString()); 389 | } 390 | 391 | 392 | private static JestClient getJestClient() { 393 | JestClientFactory factory = new JestClientFactory(); 394 | factory.setHttpClientConfig(new HttpClientConfig 395 | .Builder("http://127.0.0.1:9200") 396 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 397 | .multiThreaded(true) 398 | .readTimeout(10000) 399 | .build()); 400 | JestClient client = factory.getObject(); 401 | return client; 402 | } 403 | 404 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/controller/ArticleController.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.controller; 2 | 3 | import net.aimeizi.model.Article; 4 | import net.aimeizi.service.ArticleService; 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Controller; 8 | import org.springframework.web.bind.annotation.RequestMapping; 9 | import org.springframework.web.bind.annotation.RequestMethod; 10 | import org.springframework.web.servlet.ModelAndView; 11 | 12 | import javax.servlet.http.HttpServletRequest; 13 | import javax.servlet.http.HttpServletResponse; 14 | import java.util.List; 15 | import java.util.Map; 16 | 17 | @Controller 18 | public class ArticleController { 19 | 20 | @Autowired 21 | // @Resource 22 | private ArticleService articleService; 23 | 24 | @RequestMapping(value = "/", method = RequestMethod.GET) 25 | public String tosearch() { 26 | return "search"; 27 | } 28 | 29 | @RequestMapping(value = "/search", method = RequestMethod.POST) 30 | public ModelAndView search(HttpServletRequest request, HttpServletResponse response) { 31 | ModelAndView modelAndView = new ModelAndView(); 32 | modelAndView.setViewName("search"); 33 | String field = request.getParameter("field"); 34 | String queryString = request.getParameter("queryString"); 35 | String older = request.getParameter("older"); 36 | String pageNumber = request.getParameter("pageNumber"); 37 | String pageSize = request.getParameter("pageSize"); 38 | if (StringUtils.isEmpty(queryString)) { 39 | return modelAndView; 40 | } 41 | try { 42 | if (StringUtils.isEmpty(pageNumber) || StringUtils.isEmpty(pageSize)) { 43 | pageNumber = String.valueOf("1"); 44 | pageSize = String.valueOf("10"); 45 | } 46 | Map maps = articleService.search(field, queryString, older, Integer.parseInt(pageNumber), Integer.parseInt(pageSize)); 47 | modelAndView.addObject("queryString", queryString); 48 | modelAndView.addObject("articles", (List) maps.get("articles")); 49 | long count = (Long) maps.get("count"); 50 | modelAndView.addObject("count", count); 51 | modelAndView.addObject("took", (Long) maps.get("took")); 52 | modelAndView.addObject("field", field); 53 | modelAndView.addObject("older", older); 54 | modelAndView.addObject("pageNumber", pageNumber); 55 | modelAndView.addObject("pageSize", pageSize); 56 | modelAndView.addObject("totalPages", count % Integer.parseInt(pageSize) == 0 ? count / Integer.parseInt(pageSize) : count / Integer.parseInt(pageSize) + 1); 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | } 60 | return modelAndView; 61 | } 62 | 63 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/ArticleDao.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | /** 6 | * Created by Administrator on 2015/9/10. 7 | */ 8 | public interface ArticleDao { 9 | void save(Article article); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/impl/ArticleDaoImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao.impl; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.jdbc.core.JdbcTemplate; 7 | import org.springframework.jdbc.core.PreparedStatementSetter; 8 | import org.springframework.stereotype.Repository; 9 | 10 | import javax.sql.DataSource; 11 | import java.sql.Date; 12 | import java.sql.PreparedStatement; 13 | import java.sql.SQLException; 14 | 15 | /** 16 | * Created by Administrator on 2015/9/10. 17 | */ 18 | @Repository 19 | public class ArticleDaoImpl implements ArticleDao { 20 | 21 | private JdbcTemplate jdbcTemplate; 22 | 23 | @Autowired 24 | public void setDataSource(DataSource dataSource) { 25 | this.jdbcTemplate = new JdbcTemplate(dataSource); 26 | } 27 | 28 | public void save(final Article article) { 29 | String sql = "INSERT INTO news (title,content,url,source,author,pubdate) VALUES (?,?,?,?,?,?)"; 30 | jdbcTemplate.update(sql, new PreparedStatementSetter() { 31 | public void setValues(PreparedStatement ps) throws SQLException { 32 | ps.setString(1,article.getTitle()); 33 | ps.setString(2,article.getContent()); 34 | ps.setString(3,article.getUrl()); 35 | ps.setString(4,article.getSource()); 36 | ps.setString(5,article.getAuthor()); 37 | ps.setDate(6, new Date(article.getPubdate().getTime())); 38 | } 39 | }); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/model/Article.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.model; 2 | 3 | import io.searchbox.annotations.JestId; 4 | 5 | import java.util.Date; 6 | 7 | public class Article { 8 | 9 | @JestId 10 | private int id; 11 | private String title; 12 | private String content; 13 | private String url; 14 | private Date pubdate; 15 | private String source; 16 | private String author; 17 | 18 | public Article() { 19 | } 20 | 21 | public Article(int id, String title, String content, String url, 22 | Date pubdate, String source, String author) { 23 | super(); 24 | this.id = id; 25 | this.title = title; 26 | this.content = content; 27 | this.url = url; 28 | this.pubdate = pubdate; 29 | this.source = source; 30 | this.author = author; 31 | } 32 | 33 | public int getId() { 34 | return id; 35 | } 36 | 37 | public void setId(int id) { 38 | this.id = id; 39 | } 40 | 41 | public String getTitle() { 42 | return title; 43 | } 44 | 45 | public void setTitle(String title) { 46 | this.title = title; 47 | } 48 | 49 | public String getContent() { 50 | return content; 51 | } 52 | 53 | public void setContent(String content) { 54 | this.content = content; 55 | } 56 | 57 | public String getUrl() { 58 | return url; 59 | } 60 | 61 | public void setUrl(String url) { 62 | this.url = url; 63 | } 64 | 65 | public Date getPubdate() { 66 | return pubdate; 67 | } 68 | 69 | public void setPubdate(Date pubdate) { 70 | this.pubdate = pubdate; 71 | } 72 | 73 | public String getSource() { 74 | return source; 75 | } 76 | 77 | public void setSource(String source) { 78 | this.source = source; 79 | } 80 | 81 | public String getAuthor() { 82 | return author; 83 | } 84 | 85 | public void setAuthor(String author) { 86 | this.author = author; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/ArticleService.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * Created by Administrator on 2015/9/21. 10 | */ 11 | public interface ArticleService { 12 | 13 | /** 14 | * 根据field和queryString全文检索 15 | * @param field 16 | * @param queryString 17 | * @param older 18 | * @param pageNumber 19 | * @param pageSize 20 | * @return 21 | */ 22 | Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/impl/ArticleServiceImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service.impl; 2 | 3 | import com.google.gson.GsonBuilder; 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonObject; 6 | import io.searchbox.client.JestClient; 7 | import io.searchbox.client.JestClientFactory; 8 | import io.searchbox.client.config.HttpClientConfig; 9 | import io.searchbox.core.Search; 10 | import io.searchbox.core.SearchResult; 11 | import net.aimeizi.model.Article; 12 | import net.aimeizi.service.ArticleService; 13 | import org.elasticsearch.index.query.QueryBuilders; 14 | import org.elasticsearch.search.builder.SearchSourceBuilder; 15 | import org.elasticsearch.search.highlight.HighlightBuilder; 16 | import org.springframework.stereotype.Service; 17 | 18 | import java.text.ParseException; 19 | import java.text.SimpleDateFormat; 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * Created by Administrator on 2015/9/21. 27 | */ 28 | @Service 29 | public class ArticleServiceImpl implements ArticleService { 30 | 31 | 32 | private static JestClient getJestClient() { 33 | JestClientFactory factory = new JestClientFactory(); 34 | factory.setHttpClientConfig(new HttpClientConfig 35 | .Builder("http://127.0.0.1:9200") 36 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 37 | .multiThreaded(true) 38 | .readTimeout(10000) 39 | .build()); 40 | JestClient client = factory.getObject(); 41 | return client; 42 | } 43 | 44 | /** 45 | * 检索 46 | * 47 | * @param field 48 | * @param queryString 49 | * @param older 50 | * @param pageNumber 51 | * @param pageSize 52 | * @return 53 | * @throws Exception 54 | */ 55 | public Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception { 56 | List articles = new ArrayList(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
hit : hits) { 279 | Article source = hit.source; 280 | //获取高亮后的内容 281 | Map> highlight = hit.highlight; 282 | List titlelist = highlight.get("title");//高亮后的title 283 | if(titlelist!=null){ 284 | source.setTitle(titlelist.get(0)); 285 | } 286 | List contentlist = highlight.get("content");//高亮后的content 287 | if(contentlist!=null){ 288 | source.setContent(contentlist.get(0)); 289 | } 290 | System.out.println("标题:"+source.getTitle()); 291 | System.out.println("内容:"+source.getContent()); 292 | System.out.println("url:"+source.getUrl()); 293 | System.out.println("来源:"+source.getSource()); 294 | System.out.println("作者:"+source.getAuthor()); 295 | } 296 | } 297 | 298 | private static void bulkIndex() throws Exception { 299 | JestClient jestClient = JestExample.getJestClient(); 300 | 301 | Article article1 = new Article(4,"中国获租巴基斯坦瓜达尔港2000亩土地 为期43年","巴基斯坦(瓜达尔港)港务局表示,将把瓜达尔港2000亩土地,长期租赁给中方,用于建设(瓜达尔港)首个经济特区。分析指,瓜港首个经济特区的建立,不但能对巴基斯坦的经济发展模式,产生示范作用,还能进一步提振经济水平。" + 302 | "据了解,瓜达尔港务局于今年6月完成了1500亩土地的征收工作,另外500亩的征收工作也将很快完成,所征土地主要来自巴基斯坦海军和俾路支省政府紧邻规划的集装箱货堆区,该经济特区相关基础设施建设预计耗资3500万美元。瓜港务局表示,目前已将这2000亩地租赁给中国,中方将享有43年的租赁权。巴基斯坦前驻华大使马苏德·汗表示,这对提振巴基斯坦经济大有助益:“我认为巴基斯坦所能获得的最大益处主要还是在于经济互通领域”。" + 303 | "为了鼓励国内外投资者到经济特区投资,巴政府特别针对能源、税制等国内投资短板,专门为投资者出台了利好政策来鼓励投资。这些举措包括三个方面,一是保障能源,即保障经济特区的电力和天然气供应方面享有优先权;二是减免税收,即为经济特区投资的企业提供为期10年的税收假期,并为企业有关生产设备的进口给予免关税待遇;三是一站式服务,即为有意投资经济特区的投资者提供一站式快捷服务,包括向投资者提供有关优惠政策的详尽资讯。马苏德·汗还指出,为了让巴基斯坦从投资中受益,巴政府尽可能提供了各种优惠政策:“(由于)巴基斯坦想从中获益,为此做了大量力所能及的工作”。" + 304 | "巴政府高度重视瓜港经济特区建设,将其视为现代化港口的“标配”,期待其能够最大化利用瓜港深水良港的自然禀赋,吸引国内外投资者建立生产、组装以及加工企业。为鼓励投资,巴方还开出了20年免税的优惠条件。" + 305 | "除了瓜达尔港,中巴还有哪些项目?" + 306 | "根据中国和巴基斯坦两国4月20日发表的联合声明,双方将积极推进喀喇昆仑公路升级改造二期(塔科特至哈维连段)、瓜达尔港东湾快速路、新国际机场、卡拉奇至拉合尔高速公路(木尔坦至苏库尔段)、拉合尔轨道交通橙线、海尔-鲁巴经济区、中巴跨境光缆、在巴实行地面数字电视传输标准等重点合作项目及一批基础设施和能源电力项目。" + 307 | "应巴基斯坦总统侯赛因和总理谢里夫邀请,中国国家主席习近平于4月20日至21日对巴基斯坦进行国事访问。访问期间,习近平主席会见了侯赛因总统、谢里夫总理以及巴基斯坦议会、军队和政党领导人,同巴各界人士进行了广泛接触。" + 308 | "双方高度评价将中巴经济走廊打造成丝绸之路经济带和21世纪海上丝绸之路倡议重大项目所取得的进展。巴方欢迎中方设立丝路基金并将该基金用于中巴经济走廊相关项目。" + 309 | "巴方将坚定支持并积极参与“一带一路”建设。丝路基金宣布入股三峡南亚公司,与长江三峡集团等机构联合开发巴基斯坦卡洛特水电站等清洁能源项目,这是丝路基金成立后的首个投资项目。丝路基金愿积极扩展中巴经济走廊框架下的其他项目投融资机会,为“一带一路”建设发挥助推作用。" + 310 | "双方认为,“一带一路”倡议是区域合作和南南合作的新模式,将为实现亚洲整体振兴和各国共同繁荣带来新机遇。" + 311 | "双方对中巴经济走廊建设取得的进展表示满意,强调走廊规划发展将覆盖巴全国各地区,造福巴全体人民,促进中巴两国及本地区各国共同发展繁荣。" + 312 | "双方同意,以中巴经济走廊为引领,以瓜达尔港、能源、交通基础设施和产业合作为重点,形成“1+4”经济合作布局。双方欢迎中巴经济走廊联委会第四次会议成功举行,同意尽快完成《中巴经济走廊远景规划》。", "http://news.163.com/15/0909/14/B332O90E0001124J.html", Calendar.getInstance().getTime(), "中国青年网", "匿名"); 313 | Article article2 = new Article(5,"中央党校举行秋季学期开学典礼 刘云山出席讲话","新华网北京9月7日电 中共中央党校7日上午举行2015年秋季学期开学典礼。中共中央政治局常委、中央党校校长刘云山出席并讲话,就深入学习贯彻习近平总书记系列重要讲话精神、坚持党校姓党提出要求。" + 314 | "刘云山指出,党校姓党是党校工作的根本原则。坚持党校姓党,重要的是坚持坚定正确的政治方向、贯彻实事求是的思想路线、落实从严治校的基本方针。要把党的基本理论教育和党性党风教育作为主课,深化中国特色社会主义理论体系学习教育,深化对习近平总书记系列重要讲话精神的学习教育,深化党章和党纪党规的学习教育。要坚持实事求是的思想方法和工作方法,弘扬理论联系实际的学风,提高教学和科研工作的针对性实效性。要严明制度、严肃纪律,把从严治校要求体现到党校工作和学员管理各方面,使党校成为不正之风的“净化器”。" + 315 | "刘云山指出,坚持党校姓党,既是对党校教职工的要求,也是对党校学员的要求。每一位学员都要强化党的意识,保持对党忠诚的政治品格,忠诚于党的信仰,坚定道路自信、理论自信、制度自信;忠诚于党的宗旨,牢记为了人民是天职、服务人民是本职,自觉践行党的群众路线;忠诚于党的事业,勤政敬业、先之劳之、敢于担当,保持干事创业的进取心和精气神。要强化党的纪律规矩意识,经常看一看党的政治准则、组织原则执行得怎么样,看一看党的路线方针政策落实得怎么样,看一看重大事项请示报告制度贯彻得怎么样,找差距、明不足,做政治上的明白人、遵规守纪的老实人。" + 316 | "刘云山强调,领导干部来党校学习,就要自觉接受党的优良作风的洗礼,修好作风建设这门大课。要重温党的光荣传统,学习革命先辈、英雄模范和优秀典型的先进事迹和崇高风范,自觉践行社会主义核心价值观,永葆共产党人的先进性纯洁性,以人格力量传递作风建设正能量。要认真落实党中央关于从严治党、改进作风的一系列要求,贯彻从严精神、突出问题导向,把自己摆进去、把职责摆进去,推动思想问题和实际问题一起解决,履行好党和人民赋予的职责使命。" + 317 | "赵乐际出席开学典礼。" + 318 | "中央有关部门负责同志,中央党校校委成员、全体学员和教职工在主会场参加开学典礼。中国浦东、井冈山、延安干部学院全体学员和教职工在分会场参加开学典礼。", "http://news.163.com/15/0907/20/B2UGF9860001124J.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 319 | Article article3 = new Article(6,"俞正声率中央代表团赴大昭寺看望宗教界人士","国际在线报道:7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员赴大昭寺慰问宗教界爱国人士。俞正声向大昭寺赠送了习近平总书记题写的“加强民族团结,建设美丽西藏”贺幛,珐琅彩平安瓶,并向僧人发放布施,他在会见僧众时表示,希望藏传佛教坚持爱国爱教传统。" + 320 | "至今已有1300多年历史的大昭寺,在藏传佛教中拥有至高无上的地位。西藏的寺院归属于某一藏传佛教教派,大昭寺则是各教派共尊的神圣寺院。一年四季不论雨雪风霜,大昭寺外都有从四面八方赶来磕长头拜谒的虔诚信众。" + 321 | "7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员专门到大昭寺看望僧众,“我代表党中央、国务院和习主席向大家问好。藏传佛教有许多爱国爱教的传统,有许多高僧大德维护祖国统一和民族团结,坚持传播爱国爱教的正信。党和政府对此一贯给予充分肯定,并对藏传佛教的发展给予了支持。”" + 322 | "俞正声回忆这已是他自1995年以来第三次来大昭寺。他表示,过去二十年发生了巨大的变化,而藏传佛教的发展与祖国的发展、西藏的发展息息相关。藏传佛教要更好发展必须与社会主义社会相适应。他也向僧人们提出期望:“佛教既是信仰,也是一种文化和学问,希望大家不断提高自己对佛教的认识和理解,提高自己的水平。希望大家更好地管理好大昭寺,搞好民主管理、科学管理,使我们的管理更加规范。”" + 323 | "今天是中央代表团抵达拉萨的第二天,当天安排有多项与自治区成立五十周年相关活动。继上午接见自治区领导成员、离退休老同志、各族各界群众代表宗教界爱国人士,参观自治区50年成就展外,中央代表团下午还慰问了解放军驻拉萨部队、武警总队等。当天晚些时候,庆祝西藏自治区成立50周年招待会、文艺晚会将在拉萨举行。(来源:环球资讯)", "http://news.163.com/15/0907/16/B2U3O30R00014JB5.html", Calendar.getInstance().getTime(), "国际在线", "全宇虹"); 324 | Article article4 = new Article(7,"张德江:发挥人大主导作用 加快完备法律规范体系","新华网广州9月7日电 中共中央政治局常委、全国人大常委会委员长张德江9月6日至7日在广东出席第21次全国地方立法研讨会,并在佛山市就地方人大工作进行调研。他强调,要全面贯彻落实党的十八大和十八届三中、四中全会精神,深入学习贯彻习近平总书记系列重要讲话精神,认真实施立法法,充分发挥人大及其常委会在立法工作中的主导作用,抓住提高立法质量这个关键,加快形成完备的法律规范体系,为协调推进“四个全面”战略布局提供法治保障。" + 325 | "张德江指出,立法权是人大及其常委会的重要职权。做好新形势下立法工作,要坚持党的领导,贯彻党的路线方针政策和中央重大决策部署,切实增强思想自觉和行动自觉。要牢固树立依法立法、为民立法、科学立法理念,尊重改革发展客观规律和法治建设内在规律,加强重点领域立法,做到立法主动适应改革和经济社会发展需要。充分发挥在立法工作中的主导作用,把握立项、起草、审议等关键环节,科学确定立法项目,协调做好立法工作,研究解决立法中的重点难点问题,建立健全立法工作格局,形成立法工作合力。" + 326 | "张德江说,地方性法规是我国法律体系的重要组成部分。地方立法关键是在本地特色上下功夫、在有效管用上做文章。当前要落实好立法法的规定,扎实推进赋予设区的市地方立法权工作,明确步骤和时间,做好各项准备工作,标准不能降低,底线不能突破,坚持“成熟一个、确定一个”,确保立法质量。" + 327 | "张德江强调,加强和改进立法工作,要有高素质的立法工作队伍作为保障。要把思想政治建设摆在首位,全面提升专业素质能力,充实力量,培养人才,努力造就一支忠于党、忠于国家、忠于人民、忠于法律的立法工作队伍。" + 328 | "张德江一直非常关注地方人大特别是基层人大工作。在粤期间,他来到佛山市人大常委会,详细询问立法、监督等工作情况,希望他们与时俱进、开拓创新,切实担负起宪法法律赋予的职责。他走进南海区人大常委会、顺德区乐从镇人大主席团,同基层人大代表和人大工作者亲切交谈,肯定基层人大代表联系群众的有益做法,强调人大代表不能脱离人民群众,必须把人民利益放在心中,时刻为群众着想,听取群众意见,反映群众意愿,帮助群众解决实际问题。张德江指出,县乡人大在基层治理体系和治理能力建设中具有重要作用。要贯彻落实中央关于加强县乡人大工作和建设的精神,认真实施新修改的地方组织法、选举法、代表法,不断提高基层人大工作水平,推动人大工作迈上新台阶。" + 329 | "中共中央政治局委员、广东省委书记胡春华参加上述活动。", "http://news.163.com/15/0907/20/B2UGEUTJ00014PRF.html", Calendar.getInstance().getTime(), "新华网", "陈菲"); 330 | 331 | Bulk bulk = new Bulk.Builder() 332 | .defaultIndex("article") 333 | .defaultType("article") 334 | .addAction(Arrays.asList( 335 | new Index.Builder(article1).build(), 336 | new Index.Builder(article2).build(), 337 | new Index.Builder(article3).build(), 338 | new Index.Builder(article4).build() 339 | )).build(); 340 | jestClient.execute(bulk); 341 | } 342 | 343 | 344 | private static void createIndex() throws Exception { 345 | JestClient jestClient = JestExample.getJestClient(); 346 | Article article1 = new Article(1,"高圆圆身上淤青 遭家暴还是玩SM遭性虐?","近日,有媒体拍到高圆圆身上的淤青,腿上还有两块伤疤,引起不少人猜测是遭受家暴。" + 347 | "对于遭到家暴的传闻,高圆圆首次作出澄清,称这是因为照顾母亲而留下的伤痕,她跟赵又廷关系好得很。" + 348 | "照顾母亲竟然会留下伤痕?究竟是怎么照顾的。" + 349 | "高圆圆称,“我妈当时住院,她翻身是需要旁人帮助的,我要到床头去,抱起她的上臂,然后她的脚一蹬,这样才能翻过来。" + 350 | "但我们两个的力气都不够,每次一用力的时候,我的大腿就会刚好撞在那个床框上,所以大腿上就撞出那两块淤青了。" + 351 | "事情真的这么简单吗?即使稍微一撞,也不至于淤青吧!" + 352 | "看到那个伤疤以及淤青的皮肤,不得不让人怀疑高圆圆是遭受家暴。" + 353 | "当然,还有另外一个原因,就是玩SM遭性虐。" + 354 | "当然,这么变态的事情,相信女神不会做的。" + 355 | "是照顾母亲留下的伤痕也好,遭受家暴也好,希望女神高圆圆以后都能平平安安健健康康吧!", "http://www.vdfly.com/star/20141119/37968.html", Calendar.getInstance().getTime(), "青春娱乐网", "匿名"); 356 | Article article2 = new Article(2,"习近平:希望新西兰保障输华产品质量安全","新华网惠灵顿11月20日电(记者陈贽 田帆刘华)国家主席习近平20日在惠灵顿同新西兰总理约翰·基举行会谈,双方决定,将中新关系提升为全面战略伙伴关系,共建中新两国利益共同体,推动两国合作不断迈上新台阶。" + 357 | "习近平指出,中新两国相互理解、相互包容、平等相待,建立了高水平政治互信,开展了宽领域互利合作,两国人民都支持发展中新关系。" + 358 | "两国决定建立全面战略伙伴关系,为中新关系发展指明了大方向。" + 359 | "习近平强调,两国要保持高层交往,构筑多层次多渠道交流合作格局。" + 360 | "中新自由贸易协定是中国同发达国家之间达成的第一个自由贸易协定。双方要采取积极举措,推动两国经贸关系不断迈上新台阶,力争早日实现2020年双边贸易额达到300亿新元的新目标。" + 361 | "双方要继续巩固和提升农牧业等传统领域合作。中国有13亿多人口,市场大得很。新西兰乳制品、羊毛、牛羊肉、海产品等优质产品在中国很受欢迎。" + 362 | "希望新方切实保障输华产品质量安全,保障中国消费者权益。金融服务、信息技术、节能环保、生物医药等是中国重点发展的产业,新西兰有竞争力,双方可以开展更多合作。" + 363 | "习近平强调,前不久,中国同其他各方一道,推动亚太经合组织第二十二次领导人非正式会议启动了亚太自由贸易区进程。中新都是亚太经合组织成员,也都是区域全面经济伙伴关系协定谈判方,双方可以在这些机制中加强协调合作,通过打造惠及各方的地区自由贸易安排、建设好亚洲基础设施投资银行,推进亚太经济一体化进程。" + 364 | "南太平洋地区也是中方提出的21世纪海上丝绸之路的自然延伸,我们欢迎新方参与进来,使中新经贸合作取得更大发展。习近平强调,中新两国要加强人文交流,增进相互了解和友谊,中方将在新西兰设立中国文化中心。" + 365 | "中新签署电视合拍协议是中国政府同外国政府签署的首个电视合拍协议。双方要加强防务、执法交流,在打击腐败、追逃追赃等方面开展合作。" + 366 | "中方愿意同新方在南极、太平洋岛国事务上加强合作。约翰·基表示,建交42年来,新中关系取得长足进展,合作不断深化和扩大,彼此成为好伙伴。" + 367 | "新方致力于同中方一道建设好两国全面战略伙伴关系,在中方核心利益和重大关切问题上,新方将继续支持中方。新中签署自由贸易协定6年来,两国合作取得丰硕成果,新形势下,要提高双边贸易投资水平。" + 368 | "新方希望扩大新西兰农产品、乳制品对华出口,欢迎中国企业前来投资。新方重视亚洲基础设施投资银行的作用,将积极参与银行建设。新方将简化签证手续,欢迎更多中国公民前来旅游、留学。" + 369 | "中方积极节能减排,为国际社会合作应对气候变化树立了典范,新方希望同中方加强合作。我祝贺中方成功举办了亚太经合组织领导人非正式会议,愿意同中方一道,推动区域一体化进程,促进亚太地区和平与繁荣。" + 370 | "会谈后,双方发表《中国和新西兰关于建立全面战略伙伴关系的联合声明》。习近平和约翰·基共同见证了多项双边合作文件的签署,涉及气候变化、电视、教育、南极、金融、旅游、食品安全等领域。两国领导人还共同会见记者。" + 371 | "习近平强调,中新关系具有开创性、示范性意义。中新建立了全面战略伙伴关系,为两国关系规划了宏伟蓝图。中新两国签署一系列合作协议,充分展示了两国务实合作的广度和深度。" + 372 | "中国人说:“兄弟同心,其利断金。”毛利族谚语说:“你我篮子在一起,大家生活更美好。”让我们携手合作,谱写中新关系发展新篇章,更好造福两国人民。约翰·基表示,新西兰钦佩中国的非凡成就,相信中国的发展不仅造福中国人民,也给包括新西兰在内的各国带来巨大机遇。" + 373 | "我们愿同中方共同推动两国全面战略伙伴关系发展。王沪宁、栗战书、杨洁篪等参加上述活动。", "http://news.163.com/14/1120/15/ABGM1POL00014JB5.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 374 | Article article3 = new Article(3,"周末陕西将迎雨雪天气 关中近日大部雾霾缭绕","本报讯(首席记者 姬娜)前天,我省47个县城出现雾霾。昨天,关中大部分地区依旧雾霾缭绕,能见度差。预计22日-24日有雨雪天气,届时雾霾或将能得到缓解。对于这样的天气,微信上不再如之前有那么多埋怨牢骚,更多的是无奈。" + 375 | "刘先生说:“现在有雾霾是正常的,天气晴好反倒是不正常的,我已经习惯了。”网友“舟子”调侃称:“与其埋怨世界不如改变自己,多多欣赏一下这霾天气,那是多么美妙的一种朦胧感,让我想起了初恋。”" + 376 | "省气象台专家称,昨天,最低气温略有回升,陕北大部在-4-4℃,关中大部在0-8℃,陕南大部4-9℃,全省最低温出现在吴起, -3.8℃。有了雾霾和云层的包 围,西安温度在6-17℃之间。" + 377 | "预计本月下旬全省以阴天、多云为主,22-24日有降水天气。今天:陕北、关中多云间晴天,陕南多云转阴天;21日:全省多云;22日:陕北多云间阴天,关中、陕南多云转阴天,部分地方有小雨;23日:全省阴天,关中部分、陕南部分地方有小雨;24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。西安今天晴转多云,3-15℃;明天多云,后天阴天。据省气象局消息:昨天我省晴间多云,无明显冷空气活动,早晨大部分地方出现轻雾,11时后泾河、周至、长安、户县、渭南、蒲城、韩城、华县、咸阳、彬县、武功等地出现霾。" + 378 | "受雾霾影响,关中地区除宝鸡外,西安、咸阳、渭南、铜川空气质量为中度污染。今晨0℃气温线主要位于延安南部,07时气温具体分布:陕北大部-4~2℃(最低吴起-4.1℃),关中大部、商洛2~6℃(西安泾河5.8℃),汉中、安康5~10℃。渭南市气象台昨天08时发布霾黄色预警信号:预计未来24小时内渭南大部地区将出现中度霾,局地有重度霾,易形成中度到重度空气污染。今明两天大部地方多云 22-24日有降水过程今天白天:陕北、关中多云间晴天,陕南多云间阴天。早晨关中、陕南部分地方有雾或霾。今天晚上:陕北、关中多云间晴天,陕南多云间阴天。" + 379 | "21日:陕北、关中多云,陕南多云转阴天,南部局地有小雨。22日:陕北、关中多云转阴天,陕南阴天,关中南部和陕南大部有小雨。23日:全省阴天,关中部分有小雨,陕南有小到中雨。24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。25日:全省阴天转多云。26日:全省多云。后期天气趋势(27-29日):我省以阴到多云为主,中南部仍有阴雨天气。", "http://xian.qq.com/a/20141120/011300.htm", Calendar.getInstance().getTime(), "三秦都市报 - 三秦网", "姬娜"); 380 | Index index1 = new Index.Builder(article1).index("article").type("article").build(); 381 | Index index2 = new Index.Builder(article2).index("article").type("article").build(); 382 | Index index3 = new Index.Builder(article3).index("article").type("article").build(); 383 | JestResult jestResult1 = jestClient.execute(index1); 384 | System.out.println(jestResult1.getJsonString()); 385 | JestResult jestResult2 = jestClient.execute(index2); 386 | System.out.println(jestResult2.getJsonString()); 387 | JestResult jestResult3 = jestClient.execute(index3); 388 | System.out.println(jestResult3.getJsonString()); 389 | } 390 | 391 | 392 | private static JestClient getJestClient() { 393 | JestClientFactory factory = new JestClientFactory(); 394 | factory.setHttpClientConfig(new HttpClientConfig 395 | .Builder("http://127.0.0.1:9200") 396 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 397 | .multiThreaded(true) 398 | .readTimeout(10000) 399 | .build()); 400 | JestClient client = factory.getObject(); 401 | return client; 402 | } 403 | 404 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/controller/ArticleController.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.controller; 2 | 3 | import net.aimeizi.model.Article; 4 | import net.aimeizi.service.ArticleService; 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Controller; 8 | import org.springframework.web.bind.annotation.RequestMapping; 9 | import org.springframework.web.bind.annotation.RequestMethod; 10 | import org.springframework.web.servlet.ModelAndView; 11 | 12 | import javax.servlet.http.HttpServletRequest; 13 | import javax.servlet.http.HttpServletResponse; 14 | import java.util.List; 15 | import java.util.Map; 16 | 17 | @Controller 18 | public class ArticleController { 19 | 20 | @Autowired 21 | // @Resource 22 | private ArticleService articleService; 23 | 24 | @RequestMapping(value = "/", method = RequestMethod.GET) 25 | public String tosearch() { 26 | return "search"; 27 | } 28 | 29 | @RequestMapping(value = "/search", method = RequestMethod.POST) 30 | public ModelAndView search(HttpServletRequest request, HttpServletResponse response) { 31 | ModelAndView modelAndView = new ModelAndView(); 32 | modelAndView.setViewName("search"); 33 | String field = request.getParameter("field"); 34 | String queryString = request.getParameter("queryString"); 35 | String older = request.getParameter("older"); 36 | String pageNumber = request.getParameter("pageNumber"); 37 | String pageSize = request.getParameter("pageSize"); 38 | if (StringUtils.isEmpty(queryString)) { 39 | return modelAndView; 40 | } 41 | try { 42 | if (StringUtils.isEmpty(pageNumber) || StringUtils.isEmpty(pageSize)) { 43 | pageNumber = String.valueOf("1"); 44 | pageSize = String.valueOf("10"); 45 | } 46 | Map maps = articleService.search(field, queryString, older, Integer.parseInt(pageNumber), Integer.parseInt(pageSize)); 47 | modelAndView.addObject("queryString", queryString); 48 | modelAndView.addObject("articles", (List) maps.get("articles")); 49 | long count = (Long) maps.get("count"); 50 | modelAndView.addObject("count", count); 51 | modelAndView.addObject("took", (Long) maps.get("took")); 52 | modelAndView.addObject("field", field); 53 | modelAndView.addObject("older", older); 54 | modelAndView.addObject("pageNumber", pageNumber); 55 | modelAndView.addObject("pageSize", pageSize); 56 | modelAndView.addObject("totalPages", count % Integer.parseInt(pageSize) == 0 ? count / Integer.parseInt(pageSize) : count / Integer.parseInt(pageSize) + 1); 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | } 60 | return modelAndView; 61 | } 62 | 63 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/ArticleDao.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | /** 6 | * Created by Administrator on 2015/9/10. 7 | */ 8 | public interface ArticleDao { 9 | void save(Article article); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/impl/ArticleDaoImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao.impl; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.jdbc.core.JdbcTemplate; 7 | import org.springframework.jdbc.core.PreparedStatementSetter; 8 | import org.springframework.stereotype.Repository; 9 | 10 | import javax.sql.DataSource; 11 | import java.sql.Date; 12 | import java.sql.PreparedStatement; 13 | import java.sql.SQLException; 14 | 15 | /** 16 | * Created by Administrator on 2015/9/10. 17 | */ 18 | @Repository 19 | public class ArticleDaoImpl implements ArticleDao { 20 | 21 | private JdbcTemplate jdbcTemplate; 22 | 23 | @Autowired 24 | public void setDataSource(DataSource dataSource) { 25 | this.jdbcTemplate = new JdbcTemplate(dataSource); 26 | } 27 | 28 | public void save(final Article article) { 29 | String sql = "INSERT INTO news (title,content,url,source,author,pubdate) VALUES (?,?,?,?,?,?)"; 30 | jdbcTemplate.update(sql, new PreparedStatementSetter() { 31 | public void setValues(PreparedStatement ps) throws SQLException { 32 | ps.setString(1,article.getTitle()); 33 | ps.setString(2,article.getContent()); 34 | ps.setString(3,article.getUrl()); 35 | ps.setString(4,article.getSource()); 36 | ps.setString(5,article.getAuthor()); 37 | ps.setDate(6, new Date(article.getPubdate().getTime())); 38 | } 39 | }); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/model/Article.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.model; 2 | 3 | import io.searchbox.annotations.JestId; 4 | 5 | import java.util.Date; 6 | 7 | public class Article { 8 | 9 | @JestId 10 | private int id; 11 | private String title; 12 | private String content; 13 | private String url; 14 | private Date pubdate; 15 | private String source; 16 | private String author; 17 | 18 | public Article() { 19 | } 20 | 21 | public Article(int id, String title, String content, String url, 22 | Date pubdate, String source, String author) { 23 | super(); 24 | this.id = id; 25 | this.title = title; 26 | this.content = content; 27 | this.url = url; 28 | this.pubdate = pubdate; 29 | this.source = source; 30 | this.author = author; 31 | } 32 | 33 | public int getId() { 34 | return id; 35 | } 36 | 37 | public void setId(int id) { 38 | this.id = id; 39 | } 40 | 41 | public String getTitle() { 42 | return title; 43 | } 44 | 45 | public void setTitle(String title) { 46 | this.title = title; 47 | } 48 | 49 | public String getContent() { 50 | return content; 51 | } 52 | 53 | public void setContent(String content) { 54 | this.content = content; 55 | } 56 | 57 | public String getUrl() { 58 | return url; 59 | } 60 | 61 | public void setUrl(String url) { 62 | this.url = url; 63 | } 64 | 65 | public Date getPubdate() { 66 | return pubdate; 67 | } 68 | 69 | public void setPubdate(Date pubdate) { 70 | this.pubdate = pubdate; 71 | } 72 | 73 | public String getSource() { 74 | return source; 75 | } 76 | 77 | public void setSource(String source) { 78 | this.source = source; 79 | } 80 | 81 | public String getAuthor() { 82 | return author; 83 | } 84 | 85 | public void setAuthor(String author) { 86 | this.author = author; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/ArticleService.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * Created by Administrator on 2015/9/21. 10 | */ 11 | public interface ArticleService { 12 | 13 | /** 14 | * 根据field和queryString全文检索 15 | * @param field 16 | * @param queryString 17 | * @param older 18 | * @param pageNumber 19 | * @param pageSize 20 | * @return 21 | */ 22 | Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/impl/ArticleServiceImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service.impl; 2 | 3 | import com.google.gson.GsonBuilder; 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonObject; 6 | import io.searchbox.client.JestClient; 7 | import io.searchbox.client.JestClientFactory; 8 | import io.searchbox.client.config.HttpClientConfig; 9 | import io.searchbox.core.Search; 10 | import io.searchbox.core.SearchResult; 11 | import net.aimeizi.model.Article; 12 | import net.aimeizi.service.ArticleService; 13 | import org.elasticsearch.index.query.QueryBuilders; 14 | import org.elasticsearch.search.builder.SearchSourceBuilder; 15 | import org.elasticsearch.search.highlight.HighlightBuilder; 16 | import org.springframework.stereotype.Service; 17 | 18 | import java.text.ParseException; 19 | import java.text.SimpleDateFormat; 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * Created by Administrator on 2015/9/21. 27 | */ 28 | @Service 29 | public class ArticleServiceImpl implements ArticleService { 30 | 31 | 32 | private static JestClient getJestClient() { 33 | JestClientFactory factory = new JestClientFactory(); 34 | factory.setHttpClientConfig(new HttpClientConfig 35 | .Builder("http://127.0.0.1:9200") 36 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 37 | .multiThreaded(true) 38 | .readTimeout(10000) 39 | .build()); 40 | JestClient client = factory.getObject(); 41 | return client; 42 | } 43 | 44 | /** 45 | * 检索 46 | * 47 | * @param field 48 | * @param queryString 49 | * @param older 50 | * @param pageNumber 51 | * @param pageSize 52 | * @return 53 | * @throws Exception 54 | */ 55 | public Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception { 56 | List articles = new ArrayList(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
) maps.get("articles")); 49 | long count = (Long) maps.get("count"); 50 | modelAndView.addObject("count", count); 51 | modelAndView.addObject("took", (Long) maps.get("took")); 52 | modelAndView.addObject("field", field); 53 | modelAndView.addObject("older", older); 54 | modelAndView.addObject("pageNumber", pageNumber); 55 | modelAndView.addObject("pageSize", pageSize); 56 | modelAndView.addObject("totalPages", count % Integer.parseInt(pageSize) == 0 ? count / Integer.parseInt(pageSize) : count / Integer.parseInt(pageSize) + 1); 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | } 60 | return modelAndView; 61 | } 62 | 63 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/ArticleDao.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | /** 6 | * Created by Administrator on 2015/9/10. 7 | */ 8 | public interface ArticleDao { 9 | void save(Article article); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/impl/ArticleDaoImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao.impl; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.jdbc.core.JdbcTemplate; 7 | import org.springframework.jdbc.core.PreparedStatementSetter; 8 | import org.springframework.stereotype.Repository; 9 | 10 | import javax.sql.DataSource; 11 | import java.sql.Date; 12 | import java.sql.PreparedStatement; 13 | import java.sql.SQLException; 14 | 15 | /** 16 | * Created by Administrator on 2015/9/10. 17 | */ 18 | @Repository 19 | public class ArticleDaoImpl implements ArticleDao { 20 | 21 | private JdbcTemplate jdbcTemplate; 22 | 23 | @Autowired 24 | public void setDataSource(DataSource dataSource) { 25 | this.jdbcTemplate = new JdbcTemplate(dataSource); 26 | } 27 | 28 | public void save(final Article article) { 29 | String sql = "INSERT INTO news (title,content,url,source,author,pubdate) VALUES (?,?,?,?,?,?)"; 30 | jdbcTemplate.update(sql, new PreparedStatementSetter() { 31 | public void setValues(PreparedStatement ps) throws SQLException { 32 | ps.setString(1,article.getTitle()); 33 | ps.setString(2,article.getContent()); 34 | ps.setString(3,article.getUrl()); 35 | ps.setString(4,article.getSource()); 36 | ps.setString(5,article.getAuthor()); 37 | ps.setDate(6, new Date(article.getPubdate().getTime())); 38 | } 39 | }); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/model/Article.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.model; 2 | 3 | import io.searchbox.annotations.JestId; 4 | 5 | import java.util.Date; 6 | 7 | public class Article { 8 | 9 | @JestId 10 | private int id; 11 | private String title; 12 | private String content; 13 | private String url; 14 | private Date pubdate; 15 | private String source; 16 | private String author; 17 | 18 | public Article() { 19 | } 20 | 21 | public Article(int id, String title, String content, String url, 22 | Date pubdate, String source, String author) { 23 | super(); 24 | this.id = id; 25 | this.title = title; 26 | this.content = content; 27 | this.url = url; 28 | this.pubdate = pubdate; 29 | this.source = source; 30 | this.author = author; 31 | } 32 | 33 | public int getId() { 34 | return id; 35 | } 36 | 37 | public void setId(int id) { 38 | this.id = id; 39 | } 40 | 41 | public String getTitle() { 42 | return title; 43 | } 44 | 45 | public void setTitle(String title) { 46 | this.title = title; 47 | } 48 | 49 | public String getContent() { 50 | return content; 51 | } 52 | 53 | public void setContent(String content) { 54 | this.content = content; 55 | } 56 | 57 | public String getUrl() { 58 | return url; 59 | } 60 | 61 | public void setUrl(String url) { 62 | this.url = url; 63 | } 64 | 65 | public Date getPubdate() { 66 | return pubdate; 67 | } 68 | 69 | public void setPubdate(Date pubdate) { 70 | this.pubdate = pubdate; 71 | } 72 | 73 | public String getSource() { 74 | return source; 75 | } 76 | 77 | public void setSource(String source) { 78 | this.source = source; 79 | } 80 | 81 | public String getAuthor() { 82 | return author; 83 | } 84 | 85 | public void setAuthor(String author) { 86 | this.author = author; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/ArticleService.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * Created by Administrator on 2015/9/21. 10 | */ 11 | public interface ArticleService { 12 | 13 | /** 14 | * 根据field和queryString全文检索 15 | * @param field 16 | * @param queryString 17 | * @param older 18 | * @param pageNumber 19 | * @param pageSize 20 | * @return 21 | */ 22 | Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/impl/ArticleServiceImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service.impl; 2 | 3 | import com.google.gson.GsonBuilder; 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonObject; 6 | import io.searchbox.client.JestClient; 7 | import io.searchbox.client.JestClientFactory; 8 | import io.searchbox.client.config.HttpClientConfig; 9 | import io.searchbox.core.Search; 10 | import io.searchbox.core.SearchResult; 11 | import net.aimeizi.model.Article; 12 | import net.aimeizi.service.ArticleService; 13 | import org.elasticsearch.index.query.QueryBuilders; 14 | import org.elasticsearch.search.builder.SearchSourceBuilder; 15 | import org.elasticsearch.search.highlight.HighlightBuilder; 16 | import org.springframework.stereotype.Service; 17 | 18 | import java.text.ParseException; 19 | import java.text.SimpleDateFormat; 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * Created by Administrator on 2015/9/21. 27 | */ 28 | @Service 29 | public class ArticleServiceImpl implements ArticleService { 30 | 31 | 32 | private static JestClient getJestClient() { 33 | JestClientFactory factory = new JestClientFactory(); 34 | factory.setHttpClientConfig(new HttpClientConfig 35 | .Builder("http://127.0.0.1:9200") 36 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 37 | .multiThreaded(true) 38 | .readTimeout(10000) 39 | .build()); 40 | JestClient client = factory.getObject(); 41 | return client; 42 | } 43 | 44 | /** 45 | * 检索 46 | * 47 | * @param field 48 | * @param queryString 49 | * @param older 50 | * @param pageNumber 51 | * @param pageSize 52 | * @return 53 | * @throws Exception 54 | */ 55 | public Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception { 56 | List articles = new ArrayList(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
articles = new ArrayList(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------
articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 53 | 54 | 55 | 59 | 60 | 62 | 63 | 64 | 66 | 67 | 68 | 70 | 71 | 72 | 74 | 75 | 76 | 78 | 79 | 80 | 82 | 83 | 84 | 93 | 95 | 106 | 117 | 121 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | selected>全部 148 | selected>标题 149 | selected>内容 150 | selected>来源 151 | selected>作者 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | selected>升序 160 | selected>降序 161 | 162 | 163 | 164 | 165 | selected>10 166 | selected>20 167 | selected>50 168 | selected>100 169 | 170 | 171 | 172 | 搜索 173 | 174 | 175 | 176 | 177 | 178 | 搜索关键字${queryString}共检索到${count}条记录,共${totalPages}页,每页显示${pageSize}条。共耗时${took}毫秒。 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 没有要查询的内容 187 | 188 | 189 | 190 | ${article.title} 191 | ${article.content} 192 | 来源:${article.source} 编辑:${article.author} 发表日期: 193 | 194 | 195 | 196 | 197 | 198 | 199 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $(''); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('' + (page) + ''); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('' + buttonLabel + ''); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------