├── .gitignore ├── LICENSE ├── README.md ├── elasticsearch快速上手.md ├── news.bat ├── pom.xml └── src └── main ├── java └── net │ └── aimeizi │ ├── client │ ├── elasticsearch │ │ ├── NodeClient.java │ │ └── TransportClient.java │ └── jest │ │ └── JestExample.java │ ├── controller │ └── ArticleController.java │ ├── dao │ ├── ArticleDao.java │ └── impl │ │ └── ArticleDaoImpl.java │ ├── model │ └── Article.java │ ├── service │ ├── ArticleService.java │ └── impl │ │ └── ArticleServiceImpl.java │ └── webmagic │ ├── GovNewsPageProcesser.java │ ├── JdbcPipeline.java │ ├── NeteaseNewsPageProcesser.java │ └── Utils.java ├── resources ├── applicationContext.xml ├── elasticsearch.yml ├── jdbc.properties ├── log4j.properties └── sql.sql └── webapp ├── WEB-INF ├── mvc-dispatcher-servlet.xml ├── pages │ └── search.jsp └── web.xml ├── css └── pager.css ├── images └── top.png └── js ├── jquery.pager.js └── top.js /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | .classpath 3 | .project 4 | .settings 5 | target 6 | 7 | # Mobile Tools for Java (J2ME) 8 | .mtj.tmp/ 9 | 10 | # Package Files # 11 | *.jar 12 | *.war 13 | *.ear 14 | 15 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 16 | hs_err_pid* 17 | /.idea 18 | /*.iml 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 雪山飞鹄 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | jest 2 | ==== 3 | 4 | ElasticSearch Java Rest Client Examples 5 | 6 | ### 高亮查询(highlight) 7 | 8 | ``` 9 | POST http://127.0.0.1:9200/news/_search?q=李克强 10 | { 11 | "query" : { 12 | match_all:{} 13 | }, 14 | "highlight" : { 15 | "pre_tags" : ["", "", ""], 16 | "post_tags" : ["", "", ""], 17 | "fields" : [ 18 | {"title" : {}}, 19 | {"content" : { 20 | "fragment_size" : 350, 21 | "number_of_fragments" : 3, 22 | "no_match_size": 150 23 | }} 24 | ] 25 | } 26 | } 27 | ``` 28 | 29 | ``` 30 | POST http://127.0.0.1:9200/news/_search?q=李克强 31 | { 32 | "query" : { 33 | match_all:{} 34 | }, 35 | "highlight" : { 36 | "pre_tags" : [""], 37 | "post_tags" : [""], 38 | "fields" : [ 39 | {"title" : {}}, 40 | {"content" : { 41 | "fragment_size" : 350, 42 | "number_of_fragments" : 3, 43 | "no_match_size": 150 44 | }} 45 | ] 46 | } 47 | } 48 | ``` 49 | 50 | ### 删除索引 51 | 52 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html 53 | 54 | ``` 55 | DELETE http://127.0.0.1:9200/news 56 | ``` 57 | 58 | ### 创建索引 59 | 60 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html 61 | 62 | ``` 63 | PUT http://127.0.0.1:9200/news 64 | ``` 65 | 66 | ### 创建或修改mapping 67 | 68 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html 69 | 70 | ``` 71 | PUT /{index}/_mapping/{type} 72 | ``` 73 | 74 | ``` 75 | PUT http://127.0.0.1:9200/news/_mapping/article 76 | { 77 | "article": { 78 | "properties": { 79 | "pubdate": { 80 | "type": "date", 81 | "format": "dateOptionalTime" 82 | }, 83 | "author": { 84 | "type": "string" 85 | }, 86 | "content": { 87 | "type": "string" 88 | }, 89 | "id": { 90 | "type": "long" 91 | }, 92 | "source": { 93 | "type": "string" 94 | }, 95 | "title": { 96 | "type": "string" 97 | }, 98 | "url": { 99 | "type": "string" 100 | } 101 | } 102 | } 103 | } 104 | ``` 105 | 106 | ### 查看mapping 107 | 108 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-mapping.html 109 | 110 | 111 | ``` 112 | GET http://127.0.0.1:9200/_all/_mapping 113 | 114 | GET http://127.0.0.1:9200/_mapping 115 | ``` 116 | 117 | ``` 118 | GET http://127.0.0.1:9200/news/_mapping/article 119 | ``` 120 | 121 | 输出: 122 | 123 | ``` 124 | { 125 | "news": { 126 | "mappings": { 127 | "article": { 128 | "properties": { 129 | "author": { 130 | "type": "string" 131 | }, 132 | "content": { 133 | "type": "string" 134 | }, 135 | "id": { 136 | "type": "long" 137 | }, 138 | "pubdate": { 139 | "type": "date", 140 | "store": true, 141 | "format": "yyyy-MM-dd HH:mm:ss" 142 | }, 143 | "source": { 144 | "type": "string" 145 | }, 146 | "title": { 147 | "type": "string" 148 | }, 149 | "url": { 150 | "type": "string" 151 | } 152 | } 153 | } 154 | } 155 | } 156 | } 157 | ``` 158 | 159 | ### 删除mapping 160 | 161 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-mapping.html 162 | 163 | ``` 164 | [DELETE] /{index}/{type} 165 | 166 | [DELETE] /{index}/{type}/_mapping 167 | 168 | [DELETE] /{index}/_mapping/{type} 169 | ``` 170 | 171 | ``` 172 | DELETE http://127.0.0.1:9200/news/_mapping/article 173 | ``` 174 | 175 | ### ansj分词器测试 176 | 177 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=习近平 178 | 179 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=我是中国人 180 | 181 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=汪东兴同志遗体在京火化汪东兴同志病重期间和逝世后,习近平李克强张德江俞正声刘云山王岐山张高丽江泽民胡锦涛等同志,前往医院看望或通过各种形式对汪东兴同志逝世表示沉痛哀悼并向其亲属表示深切慰问新华社北京8月27日电中国共产党的优秀党员 182 | 183 | ### ansj分词器查询 184 | 185 | * 普通查询 186 | 187 | http://127.0.0.1:9200/news/_search?q=习近平&analyzer=ansj_index&size=50 188 | 189 | * 指定term查询 190 | 191 | http://127.0.0.1:9200/news/_search?q=content:江泽民&analyzer=ansj_index&size=50 192 | 193 | http://127.0.0.1:9200/news/_search?q=title:江泽民&analyzer=ansj_index&size=50 194 | 195 | http://127.0.0.1:9200/news/_search?q=source:新华网&analyzer=ansj_index&size=50 196 | 197 | * 其中`ansj_index`为在`elasticsearch.yml`文件中配置的`ansj`分词器 198 | 199 | [elasticsearch rest api 快速上手](https://github.com/ameizi/elasticsearch/issues/5) 200 | 201 | ### elasticsearch-jdbc 202 | 203 | ```bash 204 | @echo off 205 | 206 | set DIR=%~dp0 207 | set LIB="%DIR%\..\lib\*" 208 | set BIN="%DIR%\..\bin\*" 209 | 210 | REM ??? 211 | echo {^ 212 | "type" : "jdbc",^ 213 | "jdbc" : {^ 214 | "url" : "jdbc:mysql://localhost:3306/news",^ 215 | "user" : "root",^ 216 | "password" : "root",^ 217 | "schedule" : "0 0/15 * ? * *",^ 218 | "sql" : [^ 219 | {"statement":"SELECT title,content,url,source,author,pubdate FROM news"},^ 220 | {^ 221 | "statement":"SELECT title,content,url,source,author,pubdate FROM news where pubdate > ?",^ 222 | "parameter" : [ "$metrics.lastexecutionstart" ]^ 223 | }^ 224 | ],^ 225 | "autocommit" : true,^ 226 | "treat_binary_as_string" : true,^ 227 | "elasticsearch" : {^ 228 | "cluster" : "elasticsearch",^ 229 | "host" : "localhost",^ 230 | "port" : 9300^ 231 | },^ 232 | "index" : "news",^ 233 | "type" : "article"^ 234 | }^ 235 | }^ | "%JAVA_HOME%\bin\java" -cp "%LIB%" -Dlog4j.configurationFile="file://%DIR%\log4j2.xml" "org.xbib.tools.Runner" "org.xbib.tools.JDBCImporter" 236 | ``` 237 | 238 | [elasticsearch-jdbc 插件的使用](https://github.com/ameizi/elasticsearch/issues/3) 239 | -------------------------------------------------------------------------------- /elasticsearch快速上手.md: -------------------------------------------------------------------------------- 1 | # elasticsearch rest api 快速上手 2 | 3 | > 注:本文档中除无特别说明,请求方式均为`GET`。所有的请求均在`Sense`中测试通过 4 | 5 | 遵循的格式为 6 | 7 | ``` 8 | curl -X :/// 9 | ``` 10 | 11 | ### 集群健康查看 12 | 13 | * http://127.0.0.1:9200/_cat/health?v 14 | 15 | ``` 16 | epoch timestamp cluster status node.total node.data shards pri relo init unassign pending_tasks 17 | 1441940569 11:02:49 elasticsearch yellow 1 1 7 7 0 0 7 0 18 | ``` 19 | 20 | * http://127.0.0.1:9200/_cat/nodes?v 21 | 22 | ``` 23 | host ip heap.percent ram.percent load node.role master name 24 | acer 169.254.9.202 32 52 d * Mys-Tech 25 | ``` 26 | 27 | ### 列出所有的indices 28 | 29 | * http://127.0.0.1:9200/_cat/indices?v 30 | 31 | ``` 32 | health status index pri rep docs.count docs.deleted store.size pri.store.size 33 | yellow open .marvel-2015.09.11 1 1 3233 0 10.5mb 10.5mb 34 | yellow open .marvel-2015.09.10 1 1 1996 0 3.9mb 3.9mb 35 | yellow open news 5 1 3455 0 17.8mb 17.8mb 36 | ``` 37 | 38 | ### 创建索引 39 | 40 | 使用`PUT`请求创建一个countries的索引 41 | 42 | ``` 43 | curl -XPUT http://127.0.0.1:9200/countries?pretty 44 | ``` 45 | 46 | 输出: 47 | 48 | ``` 49 | { 50 | "acknowledged": true 51 | } 52 | ``` 53 | 54 | 查看索引列表 55 | 56 | ``` 57 | curl -XGET http://127.0.0.1:9200/_cat/indices?v 58 | ``` 59 | 60 | 输出: 61 | 62 | ``` 63 | health status index pri rep docs.count docs.deleted store.size pri.store.size 64 | yellow open countries 5 1 0 0 575b 575b 65 | yellow open .marvel-2015.09.11 1 1 3436 0 11.4mb 11.4mb 66 | yellow open .marvel-2015.09.10 1 1 1996 0 3.9mb 3.9mb 67 | yellow open news 5 1 3455 0 17.8mb 17.8mb 68 | ``` 69 | 70 | ### 索引文档 71 | 72 | * 使用自定义id索引文档 73 | 74 | 使用`PUT`请求创建一个索引为`countries`类型为`country`的文档。其文档编号为`1`,文档内容包含`name`和`capital` 75 | 76 | ``` 77 | curl -XPUT http://127.0.0.1:9200/countries/country/1?pretty -d ' 78 | { 79 | "name": "中国", 80 | "capital": "北京" 81 | }' 82 | ``` 83 | 84 | 输出: 85 | 86 | ``` 87 | { 88 | "_index": "countries", 89 | "_type": "country", 90 | "_id": "1", 91 | "_version": 1, 92 | "created": true 93 | } 94 | ``` 95 | 96 | * 使用系统分配的id索引文档 97 | 98 | ``` 99 | curl -XPOST http://127.0.0.1:9200/countries/country?pretty -d ' 100 | { 101 | "name": "韩国", 102 | "capital": "首尔" 103 | }' 104 | ``` 105 | 注意:使用系统分配的id时使用`POST`方式提交文档,且在`索引\类型`url格式中不再有id 106 | 107 | 输出: 108 | 109 | ``` 110 | { 111 | "_index": "countries", 112 | "_type": "country", 113 | "_id": "AU-6awteDgxJZYVN-E5I", 114 | "_version": 1, 115 | "created": true 116 | } 117 | ``` 118 | 119 | ### 查询文档 120 | 121 | 使用自定义id查询文档 122 | 123 | ``` 124 | curl -XGET http://127.0.0.1:9200/countries/country/1?pretty 125 | ``` 126 | 127 | 输出: 128 | 129 | ``` 130 | { 131 | "_index": "countries", 132 | "_type": "country", 133 | "_id": "1", 134 | "_version": 1, 135 | "found": true, 136 | "_source": { 137 | "name": "中国", 138 | "capital": "北京" 139 | } 140 | } 141 | ``` 142 | 143 | 使用系统分配的id查询 144 | 145 | ``` 146 | GET http://127.0.0.1:9200/countries/country/AU-6awteDgxJZYVN-E5I?pretty 147 | 148 | ``` 149 | 输出: 150 | 151 | ``` 152 | { 153 | "_index": "countries", 154 | "_type": "country", 155 | "_id": "AU-6awteDgxJZYVN-E5I", 156 | "_version": 1, 157 | "found": true, 158 | "_source": { 159 | "name": "韩国", 160 | "capital": "首尔" 161 | } 162 | } 163 | ``` 164 | 165 | ### 查看索引信息 166 | 167 | ``` 168 | GET http://127.0.0.1:9200/countries/ 169 | ``` 170 | 171 | 输出: 172 | 173 | ``` 174 | { 175 | "countries": { 176 | "aliases": {}, 177 | "mappings": { 178 | "country": { 179 | "properties": { 180 | "capital": { 181 | "type": "string" 182 | }, 183 | "name": { 184 | "type": "string" 185 | } 186 | } 187 | } 188 | }, 189 | "settings": { 190 | "index": { 191 | "creation_date": "1441941497754", 192 | "uuid": "UaoQ_WCATaiy5w736cjw2A", 193 | "number_of_replicas": "1", 194 | "number_of_shards": "5", 195 | "version": { 196 | "created": "1070199" 197 | } 198 | } 199 | }, 200 | "warmers": {} 201 | } 202 | } 203 | ``` 204 | 205 | ### 删除索引 206 | 207 | 删除`myindex`索引 208 | 209 | ``` 210 | DELETE http://127.0.0.1:9200/myindex/?pretty 211 | ``` 212 | 输出: 213 | 214 | ``` 215 | { 216 | "acknowledged": true 217 | } 218 | ``` 219 | 220 | ### 索引或替换一个文档 221 | 222 | 根据文档id索引或替换文档,若存在则修改替换,否则索引该文档。 223 | 224 | * 使用已存在的id 225 | 226 | 修改文档id为1的国家信息。 227 | 228 | ``` 229 | PUT 'http://127.0.0.1:9200/countries/country/1?pretty' 230 | { 231 | "name": "日本", 232 | "capital": "东京" 233 | } 234 | ``` 235 | 236 | 查询其是否已修改 237 | 238 | ``` 239 | GET http://127.0.0.1:9200/countries/country/1?pretty 240 | ``` 241 | 242 | 输出: 243 | 244 | ``` 245 | { 246 | "_index": "countries", 247 | "_type": "country", 248 | "_id": "1", 249 | "_version": 2, 250 | "found": true, 251 | "_source": { 252 | "name": "日本", 253 | "capital": "东京" 254 | } 255 | } 256 | ``` 257 | 258 | 可见国家信息已由`中国`变为`日本`,其首都信息也发生了变化 259 | 260 | * 使用不存在的id则是索引文档 261 | 262 | ``` 263 | PUT http://127.0.0.1:9200/countries/country/2?pretty 264 | { 265 | "name": "澳大利亚", 266 | "capital": "悉尼" 267 | } 268 | ``` 269 | 270 | 输出: 271 | 272 | ``` 273 | { 274 | "_index": "countries", 275 | "_type": "country", 276 | "_id": "2", 277 | "_version": 1, 278 | "created": true 279 | } 280 | ``` 281 | 282 | ### 修改文档 283 | 284 | * 按doc方式更新文档 285 | 286 | 以`doc`方式修改文档id为1的文档 287 | 288 | ``` 289 | POST http://127.0.0.1:9200/countries/country/1/_update?pretty 290 | { 291 | "doc": { "name": "美国","capital": "华盛顿"} 292 | } 293 | ``` 294 | 其中`doc`是固定写法,其内容为要修改的文档内容 295 | 296 | * 按script方式更新文档 297 | 298 | 以`script`方式修改文档id为1的文档 299 | 300 | ``` 301 | POST http://127.0.0.1:9200/countries/country/1/_update?pretty 302 | { 303 | "script": "ctx._source.name=\"加拿大\";ctx._source.capital=\"渥太华\"" 304 | } 305 | ``` 306 | 307 | ### 删除文档 308 | 309 | * 按文档id删除 310 | 311 | ``` 312 | DELETE http://127.0.0.1:9200/countries/country/1?pretty 313 | ``` 314 | 315 | 输出: 316 | 317 | ``` 318 | { 319 | "found": true, 320 | "_index": "countries", 321 | "_type": "country", 322 | "_id": "1", 323 | "_version": 6 324 | } 325 | ``` 326 | 327 | 328 | * 根据查询结果删除 329 | 330 | ``` 331 | DELETE http://127.0.0.1:9200/countries/country/_query?pretty 332 | { 333 | "query": { "match": { "name": "美国" } } 334 | } 335 | 336 | ``` 337 | 338 | 输出: 339 | 340 | ``` 341 | { 342 | "_indices": { 343 | "countries": { 344 | "_shards": { 345 | "total": 5, 346 | "successful": 5, 347 | "failed": 0 348 | } 349 | } 350 | } 351 | } 352 | ``` 353 | 354 | 查询是否还有name为美国的文档 355 | 356 | ``` 357 | GET http://127.0.0.1:9200/countries/country/_query 358 | { 359 | "query": { "match_all": { "name": "美国" } } 360 | } 361 | ``` 362 | 363 | 364 | ### 批量处理 365 | 366 | _bulk api 367 | 368 | https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html 369 | 370 | 遵循格式 371 | 372 | /_bulk, /{index}/_bulk, {index}/{type}/_bulk 373 | 374 | ``` 375 | action_and_meta_data\n 376 | optional_source\n 377 | action_and_meta_data\n 378 | optional_source\n 379 | ``` 380 | 381 | 支持的action有`index`, `create`, `delete`, `update` 382 | 383 | `index`和`create`在下一行跟上要索引的doc 384 | `delete`则不需要 385 | `update`在下一行跟上`doc`或`script` 386 | 387 | * 批量索引文档 388 | 389 | ``` 390 | POST http://127.0.0.1:9200/countries/country/_bulk?pretty 391 | {"index":{"_id":"1"}} 392 | {"name": "中国","capital": "北京"} 393 | {"index":{"_id":"2"}} 394 | {"name": "美国","capital": "华盛顿"} 395 | {"index":{"_id":"3"}} 396 | {"name": "日本","capital": "东京"} 397 | {"index":{"_id":"4"}} 398 | {"name": "澳大利亚","capital": "悉尼"} 399 | {"index":{"_id":"5"}} 400 | {"name": "印度","capital": "新德里"} 401 | {"index":{"_id":"6"}} 402 | {"name": "韩国","capital": "首尔"} 403 | ``` 404 | 405 | 以上请求将会批量索引6个文档。 406 | 407 | 输出: 408 | 409 | ``` 410 | { 411 | "took": 4, 412 | "errors": false, 413 | "items": [ 414 | { 415 | "index": { 416 | "_index": "countries", 417 | "_type": "country", 418 | "_id": "1", 419 | "_version": 1, 420 | "status": 201 421 | } 422 | }, 423 | { 424 | "index": { 425 | "_index": "countries", 426 | "_type": "country", 427 | "_id": "2", 428 | "_version": 2, 429 | "status": 200 430 | } 431 | }, 432 | { 433 | "index": { 434 | "_index": "countries", 435 | "_type": "country", 436 | "_id": "3", 437 | "_version": 1, 438 | "status": 201 439 | } 440 | }, 441 | { 442 | "index": { 443 | "_index": "countries", 444 | "_type": "country", 445 | "_id": "4", 446 | "_version": 1, 447 | "status": 201 448 | } 449 | }, 450 | { 451 | "index": { 452 | "_index": "countries", 453 | "_type": "country", 454 | "_id": "5", 455 | "_version": 1, 456 | "status": 201 457 | } 458 | }, 459 | { 460 | "index": { 461 | "_index": "countries", 462 | "_type": "country", 463 | "_id": "6", 464 | "_version": 1, 465 | "status": 201 466 | } 467 | } 468 | ] 469 | } 470 | ``` 471 | 472 | * 批量执行,含index、create、delete、update 473 | 474 | ``` 475 | POST http://127.0.0.1:9200/countries/country/_bulk?pretty 476 | {"index":{"_id":"7"}} 477 | {"name": "新加坡","capital": "渥太华"} 478 | {"create":{"_id":"8"}} 479 | {"name": "德国","capital": "柏林"} 480 | {"update":{"_id":"1"}} 481 | {"doc": {"name": "法国","capital": "巴黎" }} 482 | {"update":{"_id":"3"}} 483 | {"script": "ctx._source.name = \"法国\";ctx._source.capital = \"巴黎\""} 484 | {"delete":{"_id":"2"}} 485 | ``` 486 | 487 | 输出: 488 | 489 | ``` 490 | { 491 | "took": 40, 492 | "errors": false, 493 | "items": [ 494 | { 495 | "index": { 496 | "_index": "countries", 497 | "_type": "country", 498 | "_id": "7", 499 | "_version": 1, 500 | "status": 201 501 | } 502 | }, 503 | { 504 | "create": { 505 | "_index": "countries", 506 | "_type": "country", 507 | "_id": "8", 508 | "_version": 1, 509 | "status": 201 510 | } 511 | }, 512 | { 513 | "update": { 514 | "_index": "countries", 515 | "_type": "country", 516 | "_id": "1", 517 | "_version": 2, 518 | "status": 200 519 | } 520 | }, 521 | { 522 | "update": { 523 | "_index": "countries", 524 | "_type": "country", 525 | "_id": "3", 526 | "_version": 2, 527 | "status": 200 528 | } 529 | }, 530 | { 531 | "delete": { 532 | "_index": "countries", 533 | "_type": "country", 534 | "_id": "2", 535 | "_version": 3, 536 | "status": 200, 537 | "found": true 538 | } 539 | } 540 | ] 541 | } 542 | ``` 543 | 544 | * 导入数据 545 | 546 | countries.json 547 | 548 | ``` 549 | {"index":{"_id":"1"}} 550 | {"name": "新加坡","capital": "渥太华"} 551 | {"index":{"_id":"2"}} 552 | {"name": "韩国","capital": "首尔"} 553 | {"index":{"_id":"3"}} 554 | {"name": "朝鲜","capital": "平壤"} 555 | {"index":{"_id":"4"}} 556 | {"name": "日本","capital": "东京"} 557 | {"index":{"_id":"5"}} 558 | {"name": "马来西亚","capital": "吉隆坡"} 559 | ``` 560 | 561 | 使用curl的`--data-binary`参数导入数据 562 | 563 | ``` 564 | curl XPOST http://127.0.0.1:9200/countries/country/_bulk?pretty --data-binary @countries.json 565 | ``` 566 | 567 | 或者使用postman导入 568 | 569 | ``` 570 | http://127.0.0.1:9200/countries/country/_bulk?pretty 571 | {"index":{"_id":"1"}} 572 | {"name": "新加坡","capital": "渥太华"} 573 | {"index":{"_id":"2"}} 574 | {"name": "韩国","capital": "首尔"} 575 | {"index":{"_id":"3"}} 576 | {"name": "朝鲜","capital": "平壤"} 577 | {"index":{"_id":"4"}} 578 | {"name": "日本","capital": "东京"} 579 | {"index":{"_id":"5"}} 580 | {"name": "马来西亚","capital": "吉隆坡"} 581 | ``` 582 | 583 | ### search api 584 | 585 | * GET方式搜索(queryString) 586 | 587 | ``` 588 | GET http://127.0.0.1:9200/countries/_search?q=*&pretty 589 | ``` 590 | 591 | 注:`q=*`将匹配索引中的所有文档 592 | 593 | 输出: 594 | 595 | ``` 596 | { 597 | "took": 1, 598 | "timed_out": false, 599 | "_shards": { 600 | "total": 5, 601 | "successful": 5, 602 | "failed": 0 603 | }, 604 | "hits": { 605 | "total": 10, 606 | "max_score": 1, 607 | "hits": [ 608 | { 609 | "_index": "countries", 610 | "_type": "country", 611 | "_id": "4", 612 | "_score": 1, 613 | "_source": { 614 | "name": "日本", 615 | "capital": "东京" 616 | } 617 | }, 618 | { 619 | "_index": "countries", 620 | "_type": "country", 621 | "_id": "_query", 622 | "_score": 1, 623 | "_source": { 624 | "query": { 625 | "match_all": { 626 | "name": "美国" 627 | } 628 | } 629 | } 630 | }, 631 | { 632 | "_index": "countries", 633 | "_type": "country", 634 | "_id": "5", 635 | "_score": 1, 636 | "_source": { 637 | "name": "印度", 638 | "capital": "新德里" 639 | } 640 | }, 641 | { 642 | "_index": "countries", 643 | "_type": "country", 644 | "_id": "6", 645 | "_score": 1, 646 | "_source": { 647 | "name": "韩国", 648 | "capital": "首尔" 649 | } 650 | }, 651 | { 652 | "_index": "countries", 653 | "_type": "country", 654 | "_id": "1", 655 | "_score": 1, 656 | "_source": { 657 | "name": "新加坡", 658 | "capital": "渥太华" 659 | } 660 | }, 661 | { 662 | "_index": "countries", 663 | "_type": "country", 664 | "_id": "7", 665 | "_score": 1, 666 | "_source": { 667 | "name": "新加坡", 668 | "capital": "渥太华" 669 | } 670 | }, 671 | { 672 | "_index": "countries", 673 | "_type": "country", 674 | "_id": "2", 675 | "_score": 1, 676 | "_source": { 677 | "name": "韩国", 678 | "capital": "首尔" 679 | } 680 | }, 681 | { 682 | "_index": "countries", 683 | "_type": "country", 684 | "_id": "AU-6awteDgxJZYVN-E5I", 685 | "_score": 1, 686 | "_source": { 687 | "name": "韩国", 688 | "capital": "首尔" 689 | } 690 | }, 691 | { 692 | "_index": "countries", 693 | "_type": "country", 694 | "_id": "8", 695 | "_score": 1, 696 | "_source": { 697 | "name": "德国", 698 | "capital": "柏林" 699 | } 700 | }, 701 | { 702 | "_index": "countries", 703 | "_type": "country", 704 | "_id": "3", 705 | "_score": 1, 706 | "_source": { 707 | "name": "朝鲜", 708 | "capital": "平壤" 709 | } 710 | } 711 | ] 712 | } 713 | } 714 | ``` 715 | 716 | * POST方式搜索(含请求体query) 717 | 718 | ``` 719 | POST http://127.0.0.1:9200/countries/_search?pretty 720 | { 721 | "query": { 722 | "match_all": {} 723 | } 724 | } 725 | ``` 726 | 727 | 输出: 728 | 729 | ``` 730 | { 731 | "took": 1, 732 | "timed_out": false, 733 | "_shards": { 734 | "total": 5, 735 | "successful": 5, 736 | "failed": 0 737 | }, 738 | "hits": { 739 | "total": 10, 740 | "max_score": 1, 741 | "hits": [ 742 | { 743 | "_index": "countries", 744 | "_type": "country", 745 | "_id": "4", 746 | "_score": 1, 747 | "_source": { 748 | "name": "日本", 749 | "capital": "东京" 750 | } 751 | }, 752 | { 753 | "_index": "countries", 754 | "_type": "country", 755 | "_id": "_query", 756 | "_score": 1, 757 | "_source": { 758 | "query": { 759 | "match_all": { 760 | "name": "美国" 761 | } 762 | } 763 | } 764 | }, 765 | { 766 | "_index": "countries", 767 | "_type": "country", 768 | "_id": "5", 769 | "_score": 1, 770 | "_source": { 771 | "name": "印度", 772 | "capital": "新德里" 773 | } 774 | }, 775 | { 776 | "_index": "countries", 777 | "_type": "country", 778 | "_id": "6", 779 | "_score": 1, 780 | "_source": { 781 | "name": "韩国", 782 | "capital": "首尔" 783 | } 784 | }, 785 | { 786 | "_index": "countries", 787 | "_type": "country", 788 | "_id": "1", 789 | "_score": 1, 790 | "_source": { 791 | "name": "新加坡", 792 | "capital": "渥太华" 793 | } 794 | }, 795 | { 796 | "_index": "countries", 797 | "_type": "country", 798 | "_id": "7", 799 | "_score": 1, 800 | "_source": { 801 | "name": "新加坡", 802 | "capital": "渥太华" 803 | } 804 | }, 805 | { 806 | "_index": "countries", 807 | "_type": "country", 808 | "_id": "2", 809 | "_score": 1, 810 | "_source": { 811 | "name": "韩国", 812 | "capital": "首尔" 813 | } 814 | }, 815 | { 816 | "_index": "countries", 817 | "_type": "country", 818 | "_id": "AU-6awteDgxJZYVN-E5I", 819 | "_score": 1, 820 | "_source": { 821 | "name": "韩国", 822 | "capital": "首尔" 823 | } 824 | }, 825 | { 826 | "_index": "countries", 827 | "_type": "country", 828 | "_id": "8", 829 | "_score": 1, 830 | "_source": { 831 | "name": "德国", 832 | "capital": "柏林" 833 | } 834 | }, 835 | { 836 | "_index": "countries", 837 | "_type": "country", 838 | "_id": "3", 839 | "_score": 1, 840 | "_source": { 841 | "name": "朝鲜", 842 | "capital": "平壤" 843 | } 844 | } 845 | ] 846 | } 847 | } 848 | ``` 849 | 850 | * 限定返回条目 851 | 852 | ``` 853 | POST http://127.0.0.1:9200/countries/_search?pretty 854 | { 855 | "query": { 856 | "match_all": {} 857 | }, 858 | "size": 1 859 | } 860 | ``` 861 | 862 | `size`控制返回条目,默认为10 863 | 864 | 输出: 865 | 866 | ``` 867 | { 868 | "took": 1, 869 | "timed_out": false, 870 | "_shards": { 871 | "total": 5, 872 | "successful": 5, 873 | "failed": 0 874 | }, 875 | "hits": { 876 | "total": 10, 877 | "max_score": 1, 878 | "hits": [ 879 | { 880 | "_index": "countries", 881 | "_type": "country", 882 | "_id": "4", 883 | "_score": 1, 884 | "_source": { 885 | "name": "日本", 886 | "capital": "东京" 887 | } 888 | } 889 | ] 890 | } 891 | } 892 | ``` 893 | 894 | * 分页(form,size) 895 | 896 | ``` 897 | POST http://127.0.0.1:9200/countries/_search?pretty 898 | { 899 | "query": { 900 | "match_all": {} 901 | }, 902 | "from": 2, 903 | "size": 2 904 | } 905 | ``` 906 | 使用`from`和`size`来翻页。其中`form`默认为`0`,`size`默认为`10` 907 | 908 | * 排序(sort) 909 | 910 | ``` 911 | POST http://127.0.0.1:9200/countries/_search?pretty 912 | { 913 | "query": { 914 | "match_all": {} 915 | }, 916 | "sort": [ 917 | { 918 | "name": { 919 | "order": "desc" 920 | } 921 | } 922 | ] 923 | } 924 | ``` 925 | 其中`name`为排序字段 926 | 927 | ### 限定返回字段 928 | 929 | ``` 930 | POST http://127.0.0.1:9200/countries/_search?pretty 931 | { 932 | "query": { 933 | "match_all": {} 934 | }, 935 | "_source": ["name"] 936 | } 937 | ``` 938 | 使用`_source`来限定返回的字段。这里只返回`name` 939 | 940 | 941 | 942 | ### 高级查询 943 | 944 | * match_phrase 945 | 946 | ``` 947 | POST http://127.0.0.1:9200/countries/_search?pretty 948 | { 949 | "query": { 950 | "match_phrase": { 951 | "name": "韩国" 952 | } 953 | } 954 | } 955 | ``` 956 | 957 | * must 958 | 959 | ``` 960 | POST http://127.0.0.1:9200/countries/_search?pretty 961 | { 962 | "query": { 963 | "bool": { 964 | "must": [ 965 | {"match": { 966 | "name": "日本" 967 | }}, 968 | {"match": { 969 | "capital": "东京" 970 | }} 971 | ] 972 | } 973 | } 974 | } 975 | ``` 976 | 977 | * should 978 | 979 | ``` 980 | POST http://127.0.0.1:9200/countries/_search?pretty 981 | { 982 | "query": { 983 | "bool": { 984 | "should": [ 985 | {"match": { 986 | "name": "日本" 987 | }}, 988 | {"match": { 989 | "name": "韩国" 990 | }} 991 | ] 992 | } 993 | } 994 | } 995 | ``` 996 | * must_not 997 | ``` 998 | POST http://127.0.0.1:9200/countries/_search?pretty 999 | { 1000 | "query": { 1001 | "bool": { 1002 | "must_not": [ 1003 | {"match": { 1004 | "name": "日本" 1005 | }}, 1006 | {"match": { 1007 | "name": "韩国" 1008 | }} 1009 | ] 1010 | } 1011 | } 1012 | } 1013 | ``` 1014 | 1015 | * filter 1016 | 1017 | ``` 1018 | POST http://127.0.0.1:9200/countries/_search?pretty 1019 | { 1020 | "query": { 1021 | "match_all": {} 1022 | }, 1023 | "filter": { 1024 | "term": { 1025 | "capital": "东京" 1026 | } 1027 | } 1028 | } 1029 | ``` 1030 | 1031 | * 聚合(aggs) 1032 | 1033 | ``` 1034 | POST http://127.0.0.1:9200/countries/_search?pretty 1035 | { 1036 | "size": 0, 1037 | "aggs": { 1038 | "group_by_name": { 1039 | "terms": { 1040 | "field": "name" 1041 | } 1042 | } 1043 | } 1044 | } 1045 | ``` 1046 | 按`name`统计分组文档数 1047 | 1048 | 输出: 1049 | 1050 | ``` 1051 | { 1052 | "took": 1, 1053 | "timed_out": false, 1054 | "_shards": { 1055 | "total": 5, 1056 | "successful": 5, 1057 | "failed": 0 1058 | }, 1059 | "hits": { 1060 | "total": 10, 1061 | "max_score": 0, 1062 | "hits": [] 1063 | }, 1064 | "aggregations": { 1065 | "group_by_name": { 1066 | "doc_count_error_upper_bound": 0, 1067 | "sum_other_doc_count": 0, 1068 | "buckets": [ 1069 | { 1070 | "key": "韩国", 1071 | "doc_count": 3 1072 | }, 1073 | { 1074 | "key": "新加坡", 1075 | "doc_count": 2 1076 | }, 1077 | { 1078 | "key": "印度", 1079 | "doc_count": 1 1080 | }, 1081 | { 1082 | "key": "德国", 1083 | "doc_count": 1 1084 | }, 1085 | { 1086 | "key": "日本", 1087 | "doc_count": 1 1088 | }, 1089 | { 1090 | "key": "朝鲜", 1091 | "doc_count": 1 1092 | } 1093 | ] 1094 | } 1095 | } 1096 | } 1097 | ``` 1098 | 1099 | ### 高亮查询(highlight) 1100 | 1101 | ``` 1102 | POST http://127.0.0.1:9200/news/_search?q=李克强 1103 | { 1104 | "query" : { 1105 | match_all:{} 1106 | }, 1107 | "highlight" : { 1108 | "pre_tags" : ["", "", ""], 1109 | "post_tags" : ["", "", ""], 1110 | "fields" : [ 1111 | {"title" : {}}, 1112 | {"content" : { 1113 | "fragment_size" : 350, 1114 | "number_of_fragments" : 3, 1115 | "no_match_size": 150 1116 | }} 1117 | ] 1118 | } 1119 | } 1120 | ``` 1121 | 1122 | ``` 1123 | POST http://127.0.0.1:9200/news/_search?q=李克强 1124 | { 1125 | "query" : { 1126 | match_all:{} 1127 | }, 1128 | "highlight" : { 1129 | "pre_tags" : [""], 1130 | "post_tags" : [""], 1131 | "fields" : [ 1132 | {"title" : {}}, 1133 | {"content" : { 1134 | "fragment_size" : 350, 1135 | "number_of_fragments" : 3, 1136 | "no_match_size": 150 1137 | }} 1138 | ] 1139 | } 1140 | } 1141 | ``` 1142 | 1143 | ### 删除索引 1144 | 1145 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html 1146 | 1147 | ``` 1148 | DELETE http://127.0.0.1:9200/news 1149 | ``` 1150 | 1151 | ### 创建索引 1152 | 1153 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html 1154 | 1155 | ``` 1156 | PUT http://127.0.0.1:9200/news 1157 | ``` 1158 | 1159 | ### 创建或修改mapping 1160 | 1161 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html 1162 | 1163 | ``` 1164 | PUT /{index}/_mapping/{type} 1165 | ``` 1166 | 1167 | ``` 1168 | PUT http://127.0.0.1:9200/news/_mapping/article 1169 | { 1170 | "article": { 1171 | "properties": { 1172 | "pubdate": { 1173 | "type": "date", 1174 | "format": "dateOptionalTime" 1175 | }, 1176 | "author": { 1177 | "type": "string" 1178 | }, 1179 | "content": { 1180 | "type": "string" 1181 | }, 1182 | "id": { 1183 | "type": "long" 1184 | }, 1185 | "source": { 1186 | "type": "string" 1187 | }, 1188 | "title": { 1189 | "type": "string" 1190 | }, 1191 | "url": { 1192 | "type": "string" 1193 | } 1194 | } 1195 | } 1196 | } 1197 | ``` 1198 | 1199 | ### 查看mapping 1200 | 1201 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-mapping.html 1202 | 1203 | 1204 | ``` 1205 | GET http://127.0.0.1:9200/_all/_mapping 1206 | 1207 | GET http://127.0.0.1:9200/_mapping 1208 | ``` 1209 | 1210 | ``` 1211 | GET http://127.0.0.1:9200/news/_mapping/article 1212 | ``` 1213 | 1214 | 输出: 1215 | 1216 | ``` 1217 | { 1218 | "news": { 1219 | "mappings": { 1220 | "article": { 1221 | "properties": { 1222 | "author": { 1223 | "type": "string" 1224 | }, 1225 | "content": { 1226 | "type": "string" 1227 | }, 1228 | "id": { 1229 | "type": "long" 1230 | }, 1231 | "pubdate": { 1232 | "type": "date", 1233 | "store": true, 1234 | "format": "yyyy-MM-dd HH:mm:ss" 1235 | }, 1236 | "source": { 1237 | "type": "string" 1238 | }, 1239 | "title": { 1240 | "type": "string" 1241 | }, 1242 | "url": { 1243 | "type": "string" 1244 | } 1245 | } 1246 | } 1247 | } 1248 | } 1249 | } 1250 | ``` 1251 | 1252 | ### 删除mapping 1253 | 1254 | https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-mapping.html 1255 | 1256 | ``` 1257 | [DELETE] /{index}/{type} 1258 | 1259 | [DELETE] /{index}/{type}/_mapping 1260 | 1261 | [DELETE] /{index}/_mapping/{type} 1262 | ``` 1263 | 1264 | ``` 1265 | DELETE http://127.0.0.1:9200/news/_mapping/article 1266 | ``` 1267 | 1268 | ### ansj分词器测试 1269 | 1270 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=习近平 1271 | 1272 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=我是中国人 1273 | 1274 | http://127.0.0.1:9200/news/_analyze?analyzer=ansj_index&text=汪东兴同志遗体在京火化汪东兴同志病重期间和逝世后,习近平李克强张德江俞正声刘云山王岐山张高丽江泽民胡锦涛等同志,前往医院看望或通过各种形式对汪东兴同志逝世表示沉痛哀悼并向其亲属表示深切慰问新华社北京8月27日电中国共产党的优秀党员 1275 | 1276 | ### ansj分词器查询 1277 | 1278 | * 普通查询 1279 | 1280 | http://127.0.0.1:9200/news/_search?q=习近平&analyzer=ansj_index&size=50 1281 | 1282 | * 指定term查询 1283 | 1284 | http://127.0.0.1:9200/news/_search?q=content:江泽民&analyzer=ansj_index&size=50 1285 | 1286 | http://127.0.0.1:9200/news/_search?q=title:江泽民&analyzer=ansj_index&size=50 1287 | 1288 | http://127.0.0.1:9200/news/_search?q=source:新华网&analyzer=ansj_index&size=50 1289 | 1290 | * 其中`ansj_index`为在`elasticsearch.yml`文件中配置的`ansj`分词器 1291 | -------------------------------------------------------------------------------- /news.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | set DIR=%~dp0 4 | set LIB="%DIR%\..\lib\*" 5 | set BIN="%DIR%\..\bin\*" 6 | 7 | REM ??? 8 | echo {^ 9 | "type" : "jdbc",^ 10 | "jdbc" : {^ 11 | "url" : "jdbc:mysql://localhost:3306/news",^ 12 | "user" : "root",^ 13 | "password" : "root",^ 14 | "schedule" : "0 0/15 * ? * *",^ 15 | "sql" : [^ 16 | {"statement":"SELECT title,content,url,source,author,pubdate FROM news"},^ 17 | {^ 18 | "statement":"SELECT title,content,url,source,author,pubdate FROM news where pubdate > ?",^ 19 | "parameter" : [ "$metrics.lastexecutionstart" ]^ 20 | }^ 21 | ],^ 22 | "autocommit" : true,^ 23 | "treat_binary_as_string" : true,^ 24 | "elasticsearch" : {^ 25 | "cluster" : "elasticsearch",^ 26 | "host" : "localhost",^ 27 | "port" : 9300^ 28 | },^ 29 | "index" : "news",^ 30 | "type" : "article"^ 31 | }^ 32 | }^ | "%JAVA_HOME%\bin\java" -cp "%LIB%" -Dlog4j.configurationFile="file://%DIR%\log4j2.xml" "org.xbib.tools.Runner" "org.xbib.tools.JDBCImporter" -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | net.aimeizi 6 | elasticsearch-jest-example 7 | 1.0 8 | war 9 | 10 | jest 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | elasticsearch-releases 20 | http://maven.elasticsearch.org/releases 21 | 22 | true 23 | 24 | 25 | false 26 | 27 | 28 | 29 | 30 | 31 | 32 | javax.servlet 33 | javax.servlet-api 34 | 3.1.0 35 | provided 36 | 37 | 38 | javax.servlet 39 | jstl 40 | 1.2 41 | 42 | 43 | 44 | io.searchbox 45 | jest 46 | 0.1.7 47 | 48 | 49 | 50 | org.elasticsearch 51 | elasticsearch 52 | 1.7.1 53 | jar 54 | 55 | 56 | 57 | org.elasticsearch 58 | elasticsearch-shield 59 | 1.3.2 60 | 61 | 62 | org.slf4j 63 | slf4j-log4j12 64 | 1.6.1 65 | 66 | 67 | com.fasterxml.jackson.core 68 | jackson-databind 69 | 2.1.3 70 | 71 | 72 | junit 73 | junit 74 | 4.11 75 | 76 | 77 | 78 | us.codecraft 79 | webmagic-core 80 | 0.5.2 81 | 82 | 83 | us.codecraft 84 | webmagic-extension 85 | 0.5.2 86 | 87 | 88 | 89 | org.springframework 90 | spring-jdbc 91 | 4.2.1.RELEASE 92 | 93 | 94 | org.springframework 95 | spring-context-support 96 | 4.2.1.RELEASE 97 | 98 | 99 | mysql 100 | mysql-connector-java 101 | 5.1.31 102 | 103 | 104 | 105 | org.springframework 106 | spring-webmvc 107 | 4.2.1.RELEASE 108 | 109 | 110 | 111 | 112 | 113 | maven-compiler-plugin 114 | 2.5.1 115 | true 116 | 117 | 1.7 118 | 1.7 119 | 120 | 121 | 122 | org.apache.tomcat.maven 123 | tomcat7-maven-plugin 124 | 2.2 125 | 126 | / 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/client/elasticsearch/NodeClient.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.client.elasticsearch; 2 | 3 | import org.elasticsearch.client.Client; 4 | import org.elasticsearch.common.settings.ImmutableSettings; 5 | import org.elasticsearch.node.Node; 6 | 7 | import static org.elasticsearch.node.NodeBuilder.nodeBuilder; 8 | 9 | /** 10 | * Created by Administrator on 2015/9/9 0009. 11 | */ 12 | public class NodeClient { 13 | 14 | private static Client getNodeClient(){ 15 | Node node = nodeBuilder().clusterName("elasticsearch") 16 | .settings(ImmutableSettings.settingsBuilder().put("http.enabled", false)) 17 | .node(); 18 | Client client = node.client(); 19 | return client; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/client/elasticsearch/TransportClient.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.client.elasticsearch; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import net.aimeizi.model.Article; 6 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; 7 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexResponse; 8 | import org.elasticsearch.action.admin.indices.flush.FlushRequest; 9 | import org.elasticsearch.action.admin.indices.flush.FlushResponse; 10 | import org.elasticsearch.action.admin.indices.optimize.OptimizeRequest; 11 | import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse; 12 | import org.elasticsearch.action.bulk.BulkItemResponse; 13 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 14 | import org.elasticsearch.action.bulk.BulkResponse; 15 | import org.elasticsearch.action.count.CountResponse; 16 | import org.elasticsearch.action.delete.DeleteResponse; 17 | import org.elasticsearch.action.get.GetResponse; 18 | import org.elasticsearch.action.index.IndexRequest; 19 | import org.elasticsearch.action.index.IndexResponse; 20 | import org.elasticsearch.action.search.MultiSearchResponse; 21 | import org.elasticsearch.action.search.SearchRequestBuilder; 22 | import org.elasticsearch.action.search.SearchResponse; 23 | import org.elasticsearch.action.search.SearchType; 24 | import org.elasticsearch.action.update.UpdateRequest; 25 | import org.elasticsearch.action.update.UpdateResponse; 26 | import org.elasticsearch.client.Client; 27 | import org.elasticsearch.common.settings.ImmutableSettings; 28 | import org.elasticsearch.common.settings.Settings; 29 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 30 | import org.elasticsearch.common.unit.TimeValue; 31 | import org.elasticsearch.common.xcontent.XContentBuilder; 32 | import org.elasticsearch.index.query.*; 33 | import org.elasticsearch.script.ScriptService; 34 | import org.elasticsearch.search.SearchHit; 35 | import org.elasticsearch.search.SearchHits; 36 | import org.elasticsearch.search.highlight.HighlightField; 37 | import org.elasticsearch.search.suggest.Suggest.Suggestion; 38 | import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; 39 | import org.elasticsearch.search.suggest.term.TermSuggestion; 40 | import org.elasticsearch.search.suggest.term.TermSuggestion.Entry.Option; 41 | import org.elasticsearch.search.suggest.term.TermSuggestionBuilder; 42 | 43 | import java.util.Calendar; 44 | import java.util.HashMap; 45 | import java.util.Map; 46 | import java.util.Set; 47 | 48 | import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 49 | import static org.elasticsearch.index.query.QueryBuilders.termQuery; 50 | 51 | /** 52 | * 使用原生的Transport Client 53 | * http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/query-dsl-queries.html 54 | * @author welcome 55 | * 56 | */ 57 | public class TransportClient { 58 | 59 | /** 60 | * 创建TransportClient 61 | * @return 62 | */ 63 | private static Client createTransportClient() { 64 | //创建settings 65 | Settings settings = ImmutableSettings.settingsBuilder() 66 | .put("cluster.name", "elasticsearch")//设置集群名称 67 | // .put("shield.user", "admin:sysadmin") 68 | .build(); 69 | Client client = null; 70 | try { 71 | client = new org.elasticsearch.client.transport.TransportClient(settings) 72 | .addTransportAddress(new InetSocketTransportAddress("127.0.0.1", 9300)); 73 | } catch (Exception e) { 74 | e.printStackTrace(); 75 | } 76 | return client; 77 | } 78 | 79 | public static void main(String[] args) throws Exception { 80 | 81 | // deleteIndices("article"); 82 | // deleteIndices("book"); 83 | // 84 | // creatJsonStringIndex(); 85 | // creatMapIndex(); 86 | // creatBeanIndex(); 87 | // useXContentBuilderCreatIndex(); 88 | 89 | // deleteIndex("book","book","1"); 90 | // deleteIndex("book","book","2"); 91 | // deleteIndex("book","book","3"); 92 | 93 | // updateIndexByDoc("book", "book", "AU-ytzQZ2hJMRScy9rds"); 94 | // updateIndexByScript("book", "book", "AU-ytzQZ2hJMRScy9rds"); 95 | 96 | // upsertIndex("book", "book", "1"); 97 | 98 | // bulkIndex(); 99 | 100 | // scrollSearchDelete("book", "desc", "语言"); 101 | 102 | // deleteIndex("article","article","1"); 103 | // deleteIndex("article","article","2"); 104 | // deleteIndex("article","article","3"); 105 | 106 | // deleteIndices("article"); 107 | 108 | // getIndex("book","book","1"); 109 | // getIndex("book","book","2"); 110 | // getIndex("book","book","3"); 111 | // getIndex("article","article","1"); 112 | // 113 | // querySearch("book", "book", "desc", "方面"); 114 | // querySearch("book", "book", "desc", "语言"); 115 | // querySearch("book", "book", "desc", "设计"); 116 | // querySearch("article", "article", "content", "圆圆"); 117 | // 118 | // querySearch("book", "book", "desc", "浅出"); 119 | // querySearch("book", "book", "desc", "语言"); 120 | // querySearch("book", "book", "desc", "编程"); 121 | // querySearch("article", "article", "content", "性虐"); 122 | 123 | // multiSearch("圆圆"); 124 | // 125 | // count("book","desc","编程"); 126 | // 127 | // matchQuery("book","desc","编程"); 128 | // 129 | // booleanQuery(); 130 | // 131 | // fuzzyLikeQuery(); 132 | // 133 | // fuzzyQuery(); 134 | // 135 | // matchAllQuery(); 136 | // 137 | // prefixQuery(); 138 | // 139 | // queryString(); 140 | // 141 | // rangeQuery(); 142 | // 143 | // termsQuery(); 144 | // 145 | // wildcardQuery(); 146 | // 147 | // indicesQuery(); 148 | // 149 | // regexpQuery(); 150 | 151 | // suggest(); 152 | } 153 | 154 | 155 | /** 156 | * 创建索引 157 | * @param index 索引名称 158 | * @param type 索引type 159 | * @param sourcecontent 要索引的内容 160 | */ 161 | public static void createIndex(String index,String type,String sourcecontent) { 162 | Client client = createTransportClient(); 163 | IndexResponse response = client.prepareIndex(index, type).setSource(sourcecontent).execute().actionGet(); 164 | printIndexInfo(response); 165 | } 166 | 167 | /** 168 | * bulkIndex 169 | * @throws Exception 170 | */ 171 | private static void bulkIndex() throws Exception{ 172 | Client client = createTransportClient(); 173 | BulkRequestBuilder bulkRequest = client.prepareBulk(); 174 | bulkRequest.add(client.prepareIndex("book", "book", "3") 175 | .setSource(jsonBuilder() 176 | .startObject() 177 | .field("name", "Docker开发实践") 178 | .field("author", "曾金龙 肖新华 刘清") 179 | .field("pubinfo", "人民邮电出版社") 180 | .field("pubtime", "2015-07-01") 181 | .field("desc", "《Docker开发实践》由浅入深地介绍了Docker的实践之道,首先讲解Docker的概念、容器和镜像的相关操作、容器的数据管理等内容,接着通过不同类型的应用说明Docker的实际应用,然后介绍了网络、安全、API、管理工具Fig、Kubernetes、shipyard以及Docker三件套(Machine+Swarm+Compose)等,最后列举了常见镜像、Docker API等内容。") 182 | .endObject() 183 | ) 184 | ); 185 | 186 | bulkRequest.add(client.prepareIndex("book", "book", "4") 187 | .setSource(jsonBuilder() 188 | .startObject() 189 | .field("name", "图灵程序设计丛书:Hadoop基础教程") 190 | .field("author", "张治起") 191 | .field("pubinfo", "人民邮电出版社") 192 | .field("pubtime", "2014-01-01") 193 | .field("desc", "《图灵程序设计丛书:Hadoop基础教程》包括三个主要部分:第1~5章讲述了Hadoop的核心机制及Hadoop的工作模式;第6~7章涵盖了Hadoop更多可操作的内容;第8~11章介绍了Hadoop与其他产品和技术的组合使用。《图灵程序设计丛书:Hadoop基础教程》目的在于帮助读者了解什么是Hadoop,Hadoop是如何工作的,以及如何使用Hadoop从数据中提取有价值的信息,并用它解决大数据问题") 194 | .endObject() 195 | ) 196 | ); 197 | 198 | BulkResponse bulkResponse = bulkRequest.execute().actionGet(); 199 | if (bulkResponse.hasFailures()) { 200 | BulkItemResponse[] bulkItemResponse = bulkResponse.getItems(); 201 | for (int i = 0; i ") 451 | .setHighlighterPostTags("") 452 | // 设置查询数据的位置,分页用 453 | .setFrom(0) 454 | // 设置查询结果集的最大条数 455 | .setSize(60) 456 | // 设置是否按查询匹配度排序 457 | .setExplain(true) 458 | // 最后就是返回搜索响应信息 459 | .execute() 460 | .actionGet(); 461 | SearchHits searchHits = response.getHits(); 462 | System.out.println("-----------------在["+term+"]中搜索关键字["+queryString+"]---------------------"); 463 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 464 | SearchHit[] hits = searchHits.getHits(); 465 | for (SearchHit searchHit : hits) { 466 | //获取高亮的字段 467 | Map highlightFields = searchHit.getHighlightFields(); 468 | HighlightField highlightField = highlightFields.get(term); 469 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 470 | Map sourceAsMap = searchHit.sourceAsMap(); 471 | Set keySet = sourceAsMap.keySet(); 472 | for (String string : keySet) { 473 | System.out.println(string+":"+sourceAsMap.get(string)); 474 | } 475 | System.out.println(); 476 | } 477 | } 478 | 479 | /** 480 | * Query Search 481 | * @param index 482 | * @param type 483 | * @param term 484 | * @param queryString 485 | */ 486 | private static void scrollSearch(String index, String type,String term,String queryString){ 487 | Client client = createTransportClient(); 488 | SearchResponse scrollResp = client.prepareSearch(index) 489 | .setScroll(new TimeValue(60000)) 490 | .setTypes(type) 491 | // 设置查询类型 492 | // 1.SearchType.DFS_QUERY_THEN_FETCH = 精确查询 493 | // 2.SearchType.SCAN = 扫描查询,无序 494 | // 3.SearchType.COUNT = 不设置的话,这个为默认值,还有的自己去试试吧 495 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 496 | // 设置查询关键词 497 | .setQuery(termQuery(term, queryString)) // Query 498 | //.setPostFilter(FilterBuilders.rangeFilter("age").from(12).to(18)) // Filter 499 | .addHighlightedField(term) 500 | .setHighlighterPreTags("") 501 | .setHighlighterPostTags("") 502 | .setSize(100).execute().actionGet(); //100 hits per shard will be returned for each scroll 503 | while (true) { 504 | for (SearchHit searchHit : scrollResp.getHits()) { 505 | //获取高亮的字段 506 | Map highlightFields = searchHit.getHighlightFields(); 507 | HighlightField highlightField = highlightFields.get(term); 508 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 509 | Map sourceAsMap = searchHit.sourceAsMap(); 510 | Set keySet = sourceAsMap.keySet(); 511 | for (String string : keySet) { 512 | System.out.println(string+":"+sourceAsMap.get(string)); 513 | } 514 | System.out.println(); 515 | } 516 | scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet(); 517 | if (scrollResp.getHits().getHits().length == 0) { 518 | break; 519 | } 520 | } 521 | } 522 | 523 | 524 | /** 525 | * 526 | * @param queryString 527 | */ 528 | private static void multiSearch(String queryString){ 529 | Client client = createTransportClient(); 530 | SearchRequestBuilder srb1 = client.prepareSearch() 531 | .setQuery(QueryBuilders.queryStringQuery(queryString)); 532 | 533 | SearchRequestBuilder srb2 = client.prepareSearch() 534 | .setQuery(QueryBuilders.matchQuery("desc", queryString)); 535 | 536 | MultiSearchResponse sr = client.prepareMultiSearch() 537 | .add(srb1) 538 | .add(srb2) 539 | .execute().actionGet(); 540 | 541 | long nbHits = 0; 542 | for (MultiSearchResponse.Item item : sr.getResponses()) { 543 | SearchResponse response = item.getResponse(); 544 | nbHits += response.getHits().getTotalHits(); 545 | System.out.println("本次查询共匹配到:"+nbHits+"记录"); 546 | SearchHits searchHits = response.getHits(); 547 | System.out.println("-----------------搜索关键字为:["+queryString+"]---------------------"); 548 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 549 | SearchHit[] hits = searchHits.getHits(); 550 | for (SearchHit searchHit : hits) { 551 | Map sourceAsMap = searchHit.sourceAsMap(); 552 | Set keySet = sourceAsMap.keySet(); 553 | for (String string : keySet) { 554 | System.out.println(string+":"+sourceAsMap.get(string)); 555 | } 556 | System.out.println(); 557 | } 558 | } 559 | } 560 | 561 | /** 562 | * 563 | * @param indices 564 | * @param field 565 | * @param queryString 566 | */ 567 | private static void count(String indices,String field,String queryString){ 568 | Client client = createTransportClient(); 569 | CountResponse response = client.prepareCount(indices) 570 | .setQuery(termQuery(field, queryString)) 571 | .execute() 572 | .actionGet(); 573 | long count = response.getCount(); 574 | System.out.println("在文档"+indices+"中搜索字段"+field+"查询关键字:"+queryString+"共匹配到"+count+"条记录!"); 575 | } 576 | 577 | private static void suggest(){ 578 | Client client = createTransportClient(); 579 | 580 | // CompletionSuggestionBuilder completionSuggestion = new CompletionSuggestionBuilder("suggestions"); 581 | // completionSuggestion.field("text"); 582 | // completionSuggestion.text("园"); 583 | // completionSuggestion.size(10); 584 | // 585 | // SuggestRequestBuilder suggestRequestBuilder = client.prepareSuggest("article"); 586 | // suggestRequestBuilder.addSuggestion(completionSuggestion); 587 | // SuggestResponse suggestResponse = suggestRequestBuilder.execute().actionGet(); 588 | // 589 | // Suggestion> suggestion = suggestResponse.getSuggest().getSuggestion("suggestions"); 590 | // for(Entry entry:suggestion){ 591 | // for (Option option : entry) { 592 | // System.out.println(option.getText().string()); 593 | // } 594 | // } 595 | 596 | TermSuggestionBuilder termSuggestionBuilder = new TermSuggestionBuilder("suggest"); 597 | termSuggestionBuilder.text("编程"); 598 | termSuggestionBuilder.field("desc"); 599 | TermSuggestion termSuggestion = client.prepareSuggest("book") 600 | .addSuggestion(termSuggestionBuilder) 601 | .execute() 602 | .actionGet() 603 | .getSuggest() 604 | .getSuggestion("suggest"); 605 | Suggestion> suggestion = termSuggestion; 606 | for(Entry entry:suggestion){ 607 | for (Option option : entry) { 608 | System.out.println(option.getText().string()); 609 | } 610 | } 611 | 612 | } 613 | 614 | 615 | private static void regexpQuery() { 616 | Client client = createTransportClient(); 617 | RegexpQueryBuilder regexpQuery = QueryBuilders.regexpQuery("content", "健健|康康|圆圆|平平|安安|女神"); 618 | SearchResponse searchResponse = client.prepareSearch("article") 619 | .setQuery(regexpQuery) 620 | .addHighlightedField("content") 621 | .setHighlighterPreTags("") 622 | .setHighlighterPostTags("") 623 | .setHighlighterFragmentSize(250)//设置高亮内容长度 624 | // 设置查询数据的位置,分页用 625 | .setFrom(0) 626 | // 设置查询结果集的最大条数 627 | .setSize(60) 628 | // 设置是否按查询匹配度排序 629 | .setExplain(true) 630 | .execute() 631 | .actionGet(); 632 | SearchHits searchHits = searchResponse.getHits(); 633 | System.out.println("-----------------regexpQuery---------------------"); 634 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 635 | SearchHit[] hits = searchHits.getHits(); 636 | for (SearchHit searchHit : hits) { 637 | //获取高亮的字段 638 | Map highlightFields = searchHit.getHighlightFields(); 639 | HighlightField highlightField = highlightFields.get("content"); 640 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 641 | Map sourceAsMap = searchHit.sourceAsMap(); 642 | Set keySet = sourceAsMap.keySet(); 643 | for (String string : keySet) { 644 | System.out.println(string+":"+sourceAsMap.get(string)); 645 | } 646 | System.out.println(); 647 | } 648 | } 649 | 650 | 651 | private static void indicesQuery() { 652 | Client client = createTransportClient(); 653 | IndicesQueryBuilder indicesQuery = QueryBuilders.indicesQuery(termQuery("content", "性虐"), "news","article","book"); 654 | SearchResponse searchResponse = client.prepareSearch("article") 655 | .setQuery(indicesQuery) 656 | .addHighlightedField("content") 657 | .setHighlighterPreTags("") 658 | .setHighlighterPostTags("") 659 | .setHighlighterFragmentSize(250)//设置高亮内容长度 660 | // 设置查询数据的位置,分页用 661 | .setFrom(0) 662 | // 设置查询结果集的最大条数 663 | .setSize(60) 664 | // 设置是否按查询匹配度排序 665 | .setExplain(true) 666 | .execute() 667 | .actionGet(); 668 | SearchHits searchHits = searchResponse.getHits(); 669 | System.out.println("-----------------indicesQuery---------------------"); 670 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 671 | SearchHit[] hits = searchHits.getHits(); 672 | for (SearchHit searchHit : hits) { 673 | //获取高亮的字段 674 | Map highlightFields = searchHit.getHighlightFields(); 675 | HighlightField highlightField = highlightFields.get("content"); 676 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 677 | Map sourceAsMap = searchHit.sourceAsMap(); 678 | Set keySet = sourceAsMap.keySet(); 679 | for (String string : keySet) { 680 | System.out.println(string+":"+sourceAsMap.get(string)); 681 | } 682 | System.out.println(); 683 | } 684 | } 685 | 686 | 687 | private static void wildcardQuery() { 688 | Client client = createTransportClient(); 689 | WildcardQueryBuilder wildcardQuery = QueryBuilders.wildcardQuery("content", "S?"); 690 | SearchResponse searchResponse = client.prepareSearch("article") 691 | .setQuery(wildcardQuery) 692 | .addHighlightedField("content") 693 | .setHighlighterPreTags("") 694 | .setHighlighterPostTags("") 695 | .setHighlighterFragmentSize(250)//设置高亮内容长度 696 | // 设置查询数据的位置,分页用 697 | .setFrom(0) 698 | // 设置查询结果集的最大条数 699 | .setSize(60) 700 | // 设置是否按查询匹配度排序 701 | .setExplain(true) 702 | .execute() 703 | .actionGet(); 704 | SearchHits searchHits = searchResponse.getHits(); 705 | System.out.println("-----------------wildcardQuery---------------------"); 706 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 707 | SearchHit[] hits = searchHits.getHits(); 708 | for (SearchHit searchHit : hits) { 709 | //获取高亮的字段 710 | Map highlightFields = searchHit.getHighlightFields(); 711 | HighlightField highlightField = highlightFields.get("content"); 712 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 713 | Map sourceAsMap = searchHit.sourceAsMap(); 714 | Set keySet = sourceAsMap.keySet(); 715 | for (String string : keySet) { 716 | System.out.println(string+":"+sourceAsMap.get(string)); 717 | } 718 | System.out.println(); 719 | } 720 | } 721 | 722 | 723 | private static void termsQuery() { 724 | Client client = createTransportClient(); 725 | TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery("content", "家暴","澄清","帮助","女神","性虐"); 726 | SearchResponse searchResponse = client.prepareSearch("article") 727 | .setQuery(termsQueryBuilder) 728 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 729 | .addHighlightedField("content") 730 | .setHighlighterPreTags("") 731 | .setHighlighterPostTags("") 732 | .setHighlighterFragmentSize(250)//设置高亮内容长度 733 | // 设置查询数据的位置,分页用 734 | .setFrom(0) 735 | // 设置查询结果集的最大条数 736 | .setSize(60) 737 | // 设置是否按查询匹配度排序 738 | .setExplain(true) 739 | .execute() 740 | .actionGet(); 741 | SearchHits searchHits = searchResponse.getHits(); 742 | System.out.println("-----------------termsQuery---------------------"); 743 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 744 | SearchHit[] hits = searchHits.getHits(); 745 | for (SearchHit searchHit : hits) { 746 | //获取高亮的字段 747 | Map highlightFields = searchHit.getHighlightFields(); 748 | HighlightField highlightField = highlightFields.get("content"); 749 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 750 | Map sourceAsMap = searchHit.sourceAsMap(); 751 | Set keySet = sourceAsMap.keySet(); 752 | for (String string : keySet) { 753 | System.out.println(string+":"+sourceAsMap.get(string)); 754 | } 755 | System.out.println(); 756 | } 757 | } 758 | 759 | 760 | private static void rangeQuery() { 761 | Client client = createTransportClient(); 762 | RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("id") 763 | .from(5) 764 | .to(25) 765 | .includeLower(true) 766 | .includeUpper(false);//rangeQuery 查询id在5到24之间的内容 767 | SearchResponse searchResponse = client.prepareSearch("article") 768 | .setQuery(rangeQuery) 769 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 770 | .addHighlightedField("content") 771 | .setHighlighterPreTags("") 772 | .setHighlighterPostTags("") 773 | .setHighlighterFragmentSize(250)//设置高亮内容长度 774 | // 设置查询数据的位置,分页用 775 | .setFrom(0) 776 | // 设置查询结果集的最大条数 777 | .setSize(60) 778 | // 设置是否按查询匹配度排序 779 | .setExplain(true) 780 | .execute() 781 | .actionGet(); 782 | SearchHits searchHits = searchResponse.getHits(); 783 | System.out.println("-----------------rangeQuery---------------------"); 784 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 785 | SearchHit[] hits = searchHits.getHits(); 786 | for (SearchHit searchHit : hits) { 787 | //获取高亮的字段 788 | Map highlightFields = searchHit.getHighlightFields(); 789 | HighlightField highlightField = highlightFields.get("content"); 790 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 791 | Map sourceAsMap = searchHit.sourceAsMap(); 792 | Set keySet = sourceAsMap.keySet(); 793 | for (String string : keySet) { 794 | System.out.println(string+":"+sourceAsMap.get(string)); 795 | } 796 | System.out.println(); 797 | } 798 | } 799 | 800 | 801 | private static void queryString() { 802 | Client client = createTransportClient(); 803 | SearchResponse searchResponse = client.prepareSearch("article") 804 | .setQuery(QueryBuilders.queryStringQuery("女神 高圆圆 淤青 伤痕 床头")) 805 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 806 | .addHighlightedField("content") 807 | .setHighlighterPreTags("") 808 | .setHighlighterPostTags("") 809 | .setHighlighterFragmentSize(250)//设置高亮内容长度 810 | // 设置查询数据的位置,分页用 811 | .setFrom(0) 812 | // 设置查询结果集的最大条数 813 | .setSize(60) 814 | // 设置是否按查询匹配度排序 815 | .setExplain(true) 816 | .execute() 817 | .actionGet(); 818 | SearchHits searchHits = searchResponse.getHits(); 819 | System.out.println("-----------------queryString---------------------"); 820 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 821 | SearchHit[] hits = searchHits.getHits(); 822 | for (SearchHit searchHit : hits) { 823 | //获取高亮的字段 824 | Map highlightFields = searchHit.getHighlightFields(); 825 | HighlightField highlightField = highlightFields.get("content"); 826 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 827 | Map sourceAsMap = searchHit.sourceAsMap(); 828 | Set keySet = sourceAsMap.keySet(); 829 | for (String string : keySet) { 830 | System.out.println(string+":"+sourceAsMap.get(string)); 831 | } 832 | System.out.println(); 833 | } 834 | } 835 | 836 | 837 | private static void prefixQuery() { 838 | Client client = createTransportClient(); 839 | SearchResponse searchResponse = client.prepareSearch("article") 840 | .setQuery(QueryBuilders.prefixQuery("content","母亲")) 841 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 842 | .addHighlightedField("content") 843 | .setHighlighterPreTags("") 844 | .setHighlighterPostTags("") 845 | // 设置查询数据的位置,分页用 846 | .setFrom(0) 847 | // 设置查询结果集的最大条数 848 | .setSize(60) 849 | // 设置是否按查询匹配度排序 850 | .setExplain(true) 851 | .execute() 852 | .actionGet(); 853 | SearchHits searchHits = searchResponse.getHits(); 854 | System.out.println("-----------------prefixQuery---------------------"); 855 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 856 | SearchHit[] hits = searchHits.getHits(); 857 | for (SearchHit searchHit : hits) { 858 | //获取高亮的字段 859 | Map highlightFields = searchHit.getHighlightFields(); 860 | HighlightField highlightField = highlightFields.get("content"); 861 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 862 | Map sourceAsMap = searchHit.sourceAsMap(); 863 | Set keySet = sourceAsMap.keySet(); 864 | for (String string : keySet) { 865 | System.out.println(string+":"+sourceAsMap.get(string)); 866 | } 867 | System.out.println(); 868 | } 869 | } 870 | 871 | 872 | private static void matchAllQuery() { 873 | Client client = createTransportClient(); 874 | SearchResponse searchResponse = client.prepareSearch("book") 875 | .setQuery(QueryBuilders.matchAllQuery()) 876 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 877 | // 设置查询数据的位置,分页用 878 | .setFrom(0) 879 | // 设置查询结果集的最大条数 880 | .setSize(60) 881 | // 设置是否按查询匹配度排序 882 | .setExplain(true) 883 | .execute() 884 | .actionGet(); 885 | SearchHits searchHits = searchResponse.getHits(); 886 | System.out.println("-----------------matchAllQuery---------------------"); 887 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 888 | SearchHit[] hits = searchHits.getHits(); 889 | for (SearchHit searchHit : hits) { 890 | Map sourceAsMap = searchHit.sourceAsMap(); 891 | Set keySet = sourceAsMap.keySet(); 892 | for (String string : keySet) { 893 | System.out.println(string+":"+sourceAsMap.get(string)); 894 | } 895 | System.out.println(); 896 | } 897 | } 898 | 899 | 900 | private static void fuzzyQuery() { 901 | Client client = createTransportClient(); 902 | SearchResponse searchResponse = client.prepareSearch("article") 903 | .setQuery(QueryBuilders.fuzzyQuery("content","床头")) 904 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 905 | .addHighlightedField("content") 906 | .setHighlighterPreTags("") 907 | .setHighlighterPostTags("") 908 | // 设置查询数据的位置,分页用 909 | .setFrom(0) 910 | // 设置查询结果集的最大条数 911 | .setSize(60) 912 | // 设置是否按查询匹配度排序 913 | .setExplain(true) 914 | .execute() 915 | .actionGet(); 916 | SearchHits searchHits = searchResponse.getHits(); 917 | System.out.println("-----------------fuzzyQuery---------------------"); 918 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 919 | SearchHit[] hits = searchHits.getHits(); 920 | for (SearchHit searchHit : hits) { 921 | //获取高亮的字段 922 | Map highlightFields = searchHit.getHighlightFields(); 923 | HighlightField highlightField = highlightFields.get("content"); 924 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 925 | Map sourceAsMap = searchHit.sourceAsMap(); 926 | Set keySet = sourceAsMap.keySet(); 927 | for (String string : keySet) { 928 | System.out.println(string+":"+sourceAsMap.get(string)); 929 | } 930 | System.out.println(); 931 | } 932 | } 933 | 934 | @SuppressWarnings("deprecation") 935 | private static void fuzzyLikeQuery() { 936 | Client client = createTransportClient(); 937 | SearchResponse searchResponse = client.prepareSearch("article") 938 | .setQuery(QueryBuilders.fuzzyLikeThisQuery("content").likeText("我 要 到 床头 去")) 939 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 940 | .addHighlightedField("content") 941 | .setHighlighterPreTags("") 942 | .setHighlighterPostTags("") 943 | // 设置查询数据的位置,分页用 944 | .setFrom(0) 945 | // 设置查询结果集的最大条数 946 | .setSize(60) 947 | // 设置是否按查询匹配度排序 948 | .setExplain(true) 949 | .execute() 950 | .actionGet(); 951 | SearchHits searchHits = searchResponse.getHits(); 952 | System.out.println("-----------------fuzzyLikeQuery---------------------"); 953 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 954 | SearchHit[] hits = searchHits.getHits(); 955 | for (SearchHit searchHit : hits) { 956 | //获取高亮的字段 957 | Map highlightFields = searchHit.getHighlightFields(); 958 | HighlightField highlightField = highlightFields.get("content"); 959 | System.out.println("高亮字段:"+highlightField.getName()+"\n高亮部分内容:"+highlightField.getFragments()[0].string()); 960 | Map sourceAsMap = searchHit.sourceAsMap(); 961 | Set keySet = sourceAsMap.keySet(); 962 | for (String string : keySet) { 963 | System.out.println(string+":"+sourceAsMap.get(string)); 964 | } 965 | System.out.println(); 966 | } 967 | } 968 | 969 | 970 | /** 971 | * boolean query 972 | */ 973 | private static void booleanQuery() { 974 | Client client = createTransportClient(); 975 | QueryBuilder queryBuilder = QueryBuilders 976 | .boolQuery() 977 | .must(termQuery("desc", "结构")) 978 | .must(termQuery("name", "深入")) 979 | .mustNot(termQuery("desc", "性虐")) 980 | .should(termQuery("desc", "GoWeb")); 981 | SearchResponse searchResponse = client.prepareSearch("book") 982 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 983 | .addHighlightedField("desc") 984 | .addHighlightedField("name") 985 | .setHighlighterPreTags("") 986 | .setHighlighterPostTags("") 987 | // 设置查询数据的位置,分页用 988 | .setFrom(0) 989 | // 设置查询结果集的最大条数 990 | .setSize(60) 991 | // 设置是否按查询匹配度排序 992 | .setExplain(true) 993 | .setQuery(queryBuilder) 994 | .execute() 995 | .actionGet(); 996 | SearchHits searchHits = searchResponse.getHits(); 997 | System.out.println("-----------------boolQuery---------------------"); 998 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 999 | SearchHit[] hits = searchHits.getHits(); 1000 | for (SearchHit searchHit : hits) { 1001 | //获取高亮的字段 1002 | Map highlightFields = searchHit.getHighlightFields(); 1003 | HighlightField deschighlightField = highlightFields.get("desc"); 1004 | System.out.println("高亮字段:"+deschighlightField.getName()+"\n高亮部分内容:"+deschighlightField.getFragments()[0].string()); 1005 | HighlightField namehighlightField = highlightFields.get("name"); 1006 | System.out.println("高亮字段:"+namehighlightField.getName()+"\n高亮部分内容:"+namehighlightField.getFragments()[0].string()); 1007 | Map sourceAsMap = searchHit.sourceAsMap(); 1008 | Set keySet = sourceAsMap.keySet(); 1009 | for (String string : keySet) { 1010 | System.out.println(string+":"+sourceAsMap.get(string)); 1011 | } 1012 | System.out.println(); 1013 | } 1014 | } 1015 | 1016 | 1017 | /** 1018 | * 1019 | * @param indices 1020 | * @param field 1021 | * @param queryString 1022 | */ 1023 | private static void matchQuery(String indices,String field,String queryString){ 1024 | Client client = createTransportClient(); 1025 | SearchResponse searchResponse = client.prepareSearch(indices) 1026 | .setQuery(QueryBuilders.matchQuery(field, queryString)) 1027 | .execute() 1028 | .actionGet(); 1029 | SearchHits searchHits = searchResponse.getHits(); 1030 | System.out.println("---------------matchquery--在["+field+"]中搜索关键字["+queryString+"]---------------------"); 1031 | System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); 1032 | SearchHit[] hits = searchHits.getHits(); 1033 | for (SearchHit searchHit : hits) { 1034 | Map sourceAsMap = searchHit.sourceAsMap(); 1035 | Set keySet = sourceAsMap.keySet(); 1036 | for (String string : keySet) { 1037 | System.out.println(string+":"+sourceAsMap.get(string)); 1038 | } 1039 | System.out.println(); 1040 | } 1041 | } 1042 | 1043 | 1044 | /** 1045 | * 创建Json字符串格式的索引 1046 | */ 1047 | private static void creatJsonStringIndex() { 1048 | String json = "{" + 1049 | "\"name\":\"深入浅出Node.js\"," + 1050 | "\"author\":\"朴灵 \"," + 1051 | "\"pubinfo\":\"人民邮电出版社 \"," + 1052 | "\"pubtime\":\"2013-12-1 \"," + 1053 | "\"desc\":\"本书从不同的视角介绍了 Node 内在的特点和结构。由首章Node 介绍为索引,涉及Node 的各个方面,主要内容包含模块机制的揭示、异步I/O 实现原理的展现、异步编程的探讨、内存控制的介绍、二进制数据Buffer 的细节、Node 中的网络编程基础、Node 中的Web 开发、进程间的消息传递、Node 测试以及通过Node 构建产品需要的注意事项。最后的附录介绍了Node 的安装、调试、编码规范和NPM 仓库等事宜。本书适合想深入了解 Node 的人员阅读。\"" + 1054 | "}"; 1055 | createIndex("book","book",json); 1056 | } 1057 | 1058 | /** 1059 | * 创建Map类型的索引 1060 | */ 1061 | private static void creatMapIndex() { 1062 | Map json = new HashMap(); 1063 | json.put("name","Go Web编程"); 1064 | json.put("author","谢孟军 "); 1065 | json.put("pubinfo","电子工业出版社"); 1066 | json.put("pubtime","2013-6"); 1067 | json.put("desc","《Go Web编程》介绍如何使用Go语言编写Web,包含了Go语言的入门、Web相关的一些知识、Go中如何处理Web的各方面设计(表单、session、cookie等)、数据库以及如何编写GoWeb应用等相关知识。通过《Go Web编程》的学习能够让读者了解Go的运行机制,如何用Go编写Web应用,以及Go的应用程序的部署和维护等,让读者对整个的Go的开发了如指掌。"); 1068 | Client client = createTransportClient(); 1069 | IndexResponse response = client.prepareIndex("book", "book").setSource(json).execute().actionGet(); 1070 | printIndexInfo(response); 1071 | } 1072 | 1073 | /** 1074 | * 打印索引信息 1075 | * @param response 1076 | */ 1077 | private static void printIndexInfo(IndexResponse response) { 1078 | System.out.println("****************index ***********************"); 1079 | // Index name 1080 | String _index = response.getIndex(); 1081 | // Type name 1082 | String _type = response.getType(); 1083 | // Document ID (generated or not) 1084 | String _id = response.getId(); 1085 | // Version (if it's the first time you index this document, you will get: 1) 1086 | long _version = response.getVersion(); 1087 | System.out.println(_index+","+_type+","+_id+","+_version); 1088 | } 1089 | 1090 | /** 1091 | * 序列化Bean的方式创建索引 1092 | * @throws JsonProcessingException 1093 | */ 1094 | private static void creatBeanIndex() throws JsonProcessingException { 1095 | ObjectMapper mapper = new ObjectMapper(); 1096 | Article article = new Article(1,"高圆圆身上淤青 遭家暴还是玩SM遭性虐?","近日,有媒体拍到高圆圆身上的淤青,腿上还有两块伤疤,引起不少人猜测是遭受家暴。" + 1097 | "对于遭到家暴的传闻,高圆圆首次作出澄清,称这是因为照顾母亲而留下的伤痕,她跟赵又廷关系好得很。" + 1098 | "照顾母亲竟然会留下伤痕?究竟是怎么照顾的。" + 1099 | "高圆圆称,“我妈当时住院,她翻身是需要旁人帮助的,我要到床头去,抱起她的上臂,然后她的脚一蹬,这样才能翻过来。" + 1100 | "但我们两个的力气都不够,每次一用力的时候,我的大腿就会刚好撞在那个床框上,所以大腿上就撞出那两块淤青了。" + 1101 | "事情真的这么简单吗?即使稍微一撞,也不至于淤青吧!" + 1102 | "看到那个伤疤以及淤青的皮肤,不得不让人怀疑高圆圆是遭受家暴。" + 1103 | "当然,还有另外一个原因,就是玩SM遭性虐。" + 1104 | "当然,这么变态的事情,相信女神不会做的。" + 1105 | "是照顾母亲留下的伤痕也好,遭受家暴也好,希望女神高圆圆以后都能平平安安健健康康吧!", "http://www.vdfly.com/star/20141119/37968.html", Calendar.getInstance().getTime(), "青春娱乐网", "匿名"); 1106 | String json = mapper.writeValueAsString(article); 1107 | createIndex("article","article",json); 1108 | } 1109 | 1110 | 1111 | /** 1112 | * 使用Elasticsearch XContentBuilder 创建索引 1113 | * @throws Exception 1114 | */ 1115 | private static void useXContentBuilderCreatIndex() throws Exception { 1116 | XContentBuilder builder = jsonBuilder() 1117 | .startObject() 1118 | .field("name","Go Web编程") 1119 | .field("author","谢孟军 ") 1120 | .field("pubinfo","电子工业出版社") 1121 | .field("pubtime","2013-6") 1122 | .field("desc","《Go Web编程》介绍如何使用Go语言编写Web,包含了Go语言的入门、Web相关的一些知识、Go中如何处理Web的各方面设计(表单、session、cookie等)、数据库以及如何编写GoWeb应用等相关知识。通过《Go Web编程》的学习能够让读者了解Go的运行机制,如何用Go编写Web应用,以及Go的应用程序的部署和维护等,让读者对整个的Go的开发了如指掌。") 1123 | .endObject(); 1124 | String jsonstring = builder.string(); 1125 | createIndex("book","book",jsonstring); 1126 | } 1127 | 1128 | 1129 | } 1130 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/client/jest/JestExample.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.client.jest; 2 | 3 | import com.google.gson.GsonBuilder; 4 | import io.searchbox.client.JestClient; 5 | import io.searchbox.client.JestClientFactory; 6 | import io.searchbox.client.JestResult; 7 | import io.searchbox.client.config.HttpClientConfig; 8 | import io.searchbox.cluster.Health; 9 | import io.searchbox.cluster.NodesInfo; 10 | import io.searchbox.cluster.NodesStats; 11 | import io.searchbox.core.*; 12 | import io.searchbox.core.SearchResult.Hit; 13 | import io.searchbox.indices.*; 14 | import net.aimeizi.model.Article; 15 | import org.elasticsearch.index.query.QueryBuilders; 16 | import org.elasticsearch.search.builder.SearchSourceBuilder; 17 | import org.elasticsearch.search.highlight.HighlightBuilder; 18 | 19 | import java.text.SimpleDateFormat; 20 | import java.util.*; 21 | 22 | public class JestExample { 23 | 24 | public static void main(String[] args) throws Exception { 25 | // deleteIndex(); 26 | // createIndex(); 27 | // bulkIndex(); 28 | createSearch("性虐"); 29 | searchAll(); 30 | // getDocument("article", "article", "1"); 31 | // getDocument("article","article","2"); 32 | // getDocument("article","article","3"); 33 | // updateDocument("article", "article", "3"); 34 | // getDocument("article", "article", "3"); 35 | // deleteDocument("article","article","1"); 36 | // nodesStats(); 37 | // health(); 38 | // nodesInfo(); 39 | // indicesExists(); 40 | // flush(); 41 | // optimize(); 42 | // closeIndex(); 43 | // clearCache(); 44 | } 45 | 46 | 47 | /** 48 | * 将删除所有的索引 49 | * @throws Exception 50 | */ 51 | private static void deleteIndex() throws Exception { 52 | JestClient jestClient = JestExample.getJestClient(); 53 | DeleteIndex deleteIndex = new DeleteIndex.Builder("article").build(); 54 | JestResult result = jestClient.execute(deleteIndex); 55 | System.out.println(result.getJsonString()); 56 | } 57 | 58 | 59 | 60 | /** 61 | * 清缓存 62 | * @throws Exception 63 | */ 64 | private static void clearCache() throws Exception { 65 | JestClient jestClient = JestExample.getJestClient(); 66 | ClearCache closeIndex = new ClearCache.Builder().build(); 67 | JestResult result = jestClient.execute(closeIndex); 68 | System.out.println(result.getJsonString()); 69 | } 70 | 71 | 72 | 73 | /** 74 | * 关闭索引 75 | * @throws Exception 76 | */ 77 | private static void closeIndex() throws Exception { 78 | JestClient jestClient = JestExample.getJestClient(); 79 | CloseIndex closeIndex = new CloseIndex.Builder("article").build(); 80 | JestResult result = jestClient.execute(closeIndex); 81 | System.out.println(result.getJsonString()); 82 | } 83 | 84 | /** 85 | * 优化索引 86 | * @throws Exception 87 | */ 88 | private static void optimize() throws Exception { 89 | JestClient jestClient = JestExample.getJestClient(); 90 | Optimize optimize = new Optimize.Builder().build(); 91 | JestResult result = jestClient.execute(optimize); 92 | System.out.println(result.getJsonString()); 93 | } 94 | 95 | /** 96 | * 刷新索引 97 | * @throws Exception 98 | */ 99 | private static void flush() throws Exception { 100 | JestClient jestClient = JestExample.getJestClient(); 101 | Flush flush = new Flush.Builder().build(); 102 | JestResult result = jestClient.execute(flush); 103 | System.out.println(result.getJsonString()); 104 | } 105 | 106 | /** 107 | * 判断索引目录是否存在 108 | * @throws Exception 109 | */ 110 | private static void indicesExists() throws Exception { 111 | JestClient jestClient = JestExample.getJestClient(); 112 | IndicesExists indicesExists = new IndicesExists.Builder("article").build(); 113 | JestResult result = jestClient.execute(indicesExists); 114 | System.out.println(result.getJsonString()); 115 | } 116 | 117 | /** 118 | * 查看节点信息 119 | * @throws Exception 120 | */ 121 | private static void nodesInfo() throws Exception { 122 | JestClient jestClient = JestExample.getJestClient(); 123 | NodesInfo nodesInfo = new NodesInfo.Builder().build(); 124 | JestResult result = jestClient.execute(nodesInfo); 125 | System.out.println(result.getJsonString()); 126 | } 127 | 128 | 129 | /** 130 | * 查看集群健康信息 131 | * @throws Exception 132 | */ 133 | private static void health() throws Exception { 134 | JestClient jestClient = JestExample.getJestClient(); 135 | Health health = new Health.Builder().build(); 136 | JestResult result = jestClient.execute(health); 137 | System.out.println(result.getJsonString()); 138 | } 139 | 140 | /** 141 | * 节点状态 142 | * @throws Exception 143 | */ 144 | private static void nodesStats() throws Exception { 145 | JestClient jestClient = JestExample.getJestClient(); 146 | NodesStats nodesStats = new NodesStats.Builder().build(); 147 | JestResult result = jestClient.execute(nodesStats); 148 | System.out.println(result.getJsonString()); 149 | } 150 | 151 | /** 152 | * 更新Document 153 | * @param index 154 | * @param type 155 | * @param id 156 | * @throws Exception 157 | */ 158 | private static void updateDocument(String index,String type,String id) throws Exception { 159 | JestClient jestClient = JestExample.getJestClient(); 160 | Article article = new Article(); 161 | article.setId(Integer.parseInt(id)); 162 | article.setTitle("中国3颗卫星拍到阅兵现场高清照"); 163 | article.setContent("据中国资源卫星应用中心报道,9月3日,纪念中国人民抗日战争暨世界反法西斯战争胜利70周年大阅兵在天安门广场举行。资源卫星中心针对此次盛事,综合调度在轨卫星,9月1日至3日连续三天持续观测首都北京天安门附近区域,共计安排5次高分辨率卫星成像。在阅兵当日,高分二号卫星、资源三号卫星及实践九号卫星实现三星联合、密集观测,捕捉到了阅兵现场精彩瞬间。为了保证卫星准确拍摄天安门及周边区域,提高数据处理效率,及时制作合格的光学产品,资源卫星中心运行服务人员从卫星观测计划制定、复核、优化到系统运行保障、光学产品图像制作,提前进行了周密部署,并拟定了应急预案,为圆满完成既定任务奠定了基础。"); 164 | article.setPubdate(new Date()); 165 | article.setAuthor("匿名"); 166 | article.setSource("新华网"); 167 | article.setUrl("http://news.163.com/15/0909/07/B32AGCDT00014JB5.html"); 168 | String script = "{" + 169 | " \"doc\" : {" + 170 | " \"title\" : \""+article.getTitle()+"\"," + 171 | " \"content\" : \""+article.getContent()+"\"," + 172 | " \"author\" : \""+article.getAuthor()+"\"," + 173 | " \"source\" : \""+article.getSource()+"\"," + 174 | " \"url\" : \""+article.getUrl()+"\"," + 175 | " \"pubdate\" : \""+new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(article.getPubdate())+"\"" + 176 | " }" + 177 | "}"; 178 | Update update = new Update.Builder(script).index(index).type(type).id(id).build(); 179 | JestResult result = jestClient.execute(update); 180 | System.out.println(result.getJsonString()); 181 | } 182 | 183 | 184 | /** 185 | * 删除Document 186 | * @param index 187 | * @param type 188 | * @param id 189 | * @throws Exception 190 | */ 191 | private static void deleteDocument(String index,String type,String id) throws Exception { 192 | JestClient jestClient = JestExample.getJestClient(); 193 | Delete delete = new Delete.Builder(id).index(index).type(type).build(); 194 | JestResult result = jestClient.execute(delete); 195 | System.out.println(result.getJsonString()); 196 | } 197 | 198 | /** 199 | * 获取Document 200 | * @param index 201 | * @param type 202 | * @param id 203 | * @throws Exception 204 | */ 205 | private static void getDocument(String index,String type,String id) throws Exception { 206 | JestClient jestClient = JestExample.getJestClient(); 207 | Get get = new Get.Builder(index, id).type(type).build(); 208 | JestResult result = jestClient.execute(get); 209 | Article article = result.getSourceAsObject(Article.class); 210 | System.out.println(article.getTitle()+","+article.getContent()); 211 | } 212 | 213 | /** 214 | * Suggestion 215 | * @throws Exception 216 | */ 217 | private static void suggest() throws Exception{ 218 | String suggestionName = "my-suggestion"; 219 | JestClient jestClient = JestExample.getJestClient(); 220 | Suggest suggest = new Suggest.Builder("{" + 221 | " \"" + suggestionName + "\" : {" + 222 | " \"text\" : \"the amsterdma meetpu\"," + 223 | " \"term\" : {" + 224 | " \"field\" : \"body\"" + 225 | " }" + 226 | " }" + 227 | "}").build(); 228 | SuggestResult suggestResult = jestClient.execute(suggest); 229 | System.out.println(suggestResult.isSucceeded()); 230 | List suggestionList = suggestResult.getSuggestions(suggestionName); 231 | System.out.println(suggestionList.size()); 232 | for(SuggestResult.Suggestion suggestion:suggestionList){ 233 | System.out.println(suggestion.text); 234 | } 235 | } 236 | 237 | /** 238 | * 查询全部 239 | * @throws Exception 240 | */ 241 | private static void searchAll() throws Exception { 242 | JestClient jestClient = JestExample.getJestClient(); 243 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 244 | searchSourceBuilder.query(QueryBuilders.matchAllQuery()); 245 | Search search = new Search.Builder(searchSourceBuilder.toString()) 246 | .addIndex("article") 247 | .build(); 248 | SearchResult result = jestClient.execute(search); 249 | System.out.println("本次查询共查到:"+result.getTotal()+"篇文章!"); 250 | List> hits = result.getHits(Article.class); 251 | for (Hit hit : hits) { 252 | Article source = hit.source; 253 | System.out.println("标题:"+source.getTitle()); 254 | System.out.println("内容:"+source.getContent()); 255 | System.out.println("url:"+source.getUrl()); 256 | System.out.println("来源:"+source.getSource()); 257 | System.out.println("作者:"+source.getAuthor()); 258 | } 259 | } 260 | 261 | 262 | private static void createSearch(String queryString) throws Exception { 263 | JestClient jestClient = JestExample.getJestClient(); 264 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 265 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 266 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 267 | highlightBuilder.field("title");//高亮title 268 | highlightBuilder.field("content");//高亮content 269 | highlightBuilder.preTags("").postTags("");//高亮标签 270 | highlightBuilder.fragmentSize(200);//高亮内容长度 271 | searchSourceBuilder.highlight(highlightBuilder); 272 | Search search = new Search.Builder(searchSourceBuilder.toString()) 273 | .addIndex("article") 274 | .build(); 275 | SearchResult result = jestClient.execute(search); 276 | System.out.println("本次查询共查到:"+result.getTotal()+"篇文章!"); 277 | List> hits = result.getHits(Article.class); 278 | for (Hit hit : hits) { 279 | Article source = hit.source; 280 | //获取高亮后的内容 281 | Map> highlight = hit.highlight; 282 | List titlelist = highlight.get("title");//高亮后的title 283 | if(titlelist!=null){ 284 | source.setTitle(titlelist.get(0)); 285 | } 286 | List contentlist = highlight.get("content");//高亮后的content 287 | if(contentlist!=null){ 288 | source.setContent(contentlist.get(0)); 289 | } 290 | System.out.println("标题:"+source.getTitle()); 291 | System.out.println("内容:"+source.getContent()); 292 | System.out.println("url:"+source.getUrl()); 293 | System.out.println("来源:"+source.getSource()); 294 | System.out.println("作者:"+source.getAuthor()); 295 | } 296 | } 297 | 298 | private static void bulkIndex() throws Exception { 299 | JestClient jestClient = JestExample.getJestClient(); 300 | 301 | Article article1 = new Article(4,"中国获租巴基斯坦瓜达尔港2000亩土地 为期43年","巴基斯坦(瓜达尔港)港务局表示,将把瓜达尔港2000亩土地,长期租赁给中方,用于建设(瓜达尔港)首个经济特区。分析指,瓜港首个经济特区的建立,不但能对巴基斯坦的经济发展模式,产生示范作用,还能进一步提振经济水平。" + 302 | "据了解,瓜达尔港务局于今年6月完成了1500亩土地的征收工作,另外500亩的征收工作也将很快完成,所征土地主要来自巴基斯坦海军和俾路支省政府紧邻规划的集装箱货堆区,该经济特区相关基础设施建设预计耗资3500万美元。瓜港务局表示,目前已将这2000亩地租赁给中国,中方将享有43年的租赁权。巴基斯坦前驻华大使马苏德·汗表示,这对提振巴基斯坦经济大有助益:“我认为巴基斯坦所能获得的最大益处主要还是在于经济互通领域”。" + 303 | "为了鼓励国内外投资者到经济特区投资,巴政府特别针对能源、税制等国内投资短板,专门为投资者出台了利好政策来鼓励投资。这些举措包括三个方面,一是保障能源,即保障经济特区的电力和天然气供应方面享有优先权;二是减免税收,即为经济特区投资的企业提供为期10年的税收假期,并为企业有关生产设备的进口给予免关税待遇;三是一站式服务,即为有意投资经济特区的投资者提供一站式快捷服务,包括向投资者提供有关优惠政策的详尽资讯。马苏德·汗还指出,为了让巴基斯坦从投资中受益,巴政府尽可能提供了各种优惠政策:“(由于)巴基斯坦想从中获益,为此做了大量力所能及的工作”。" + 304 | "巴政府高度重视瓜港经济特区建设,将其视为现代化港口的“标配”,期待其能够最大化利用瓜港深水良港的自然禀赋,吸引国内外投资者建立生产、组装以及加工企业。为鼓励投资,巴方还开出了20年免税的优惠条件。" + 305 | "除了瓜达尔港,中巴还有哪些项目?" + 306 | "根据中国和巴基斯坦两国4月20日发表的联合声明,双方将积极推进喀喇昆仑公路升级改造二期(塔科特至哈维连段)、瓜达尔港东湾快速路、新国际机场、卡拉奇至拉合尔高速公路(木尔坦至苏库尔段)、拉合尔轨道交通橙线、海尔-鲁巴经济区、中巴跨境光缆、在巴实行地面数字电视传输标准等重点合作项目及一批基础设施和能源电力项目。" + 307 | "应巴基斯坦总统侯赛因和总理谢里夫邀请,中国国家主席习近平于4月20日至21日对巴基斯坦进行国事访问。访问期间,习近平主席会见了侯赛因总统、谢里夫总理以及巴基斯坦议会、军队和政党领导人,同巴各界人士进行了广泛接触。" + 308 | "双方高度评价将中巴经济走廊打造成丝绸之路经济带和21世纪海上丝绸之路倡议重大项目所取得的进展。巴方欢迎中方设立丝路基金并将该基金用于中巴经济走廊相关项目。" + 309 | "巴方将坚定支持并积极参与“一带一路”建设。丝路基金宣布入股三峡南亚公司,与长江三峡集团等机构联合开发巴基斯坦卡洛特水电站等清洁能源项目,这是丝路基金成立后的首个投资项目。丝路基金愿积极扩展中巴经济走廊框架下的其他项目投融资机会,为“一带一路”建设发挥助推作用。" + 310 | "双方认为,“一带一路”倡议是区域合作和南南合作的新模式,将为实现亚洲整体振兴和各国共同繁荣带来新机遇。" + 311 | "双方对中巴经济走廊建设取得的进展表示满意,强调走廊规划发展将覆盖巴全国各地区,造福巴全体人民,促进中巴两国及本地区各国共同发展繁荣。" + 312 | "双方同意,以中巴经济走廊为引领,以瓜达尔港、能源、交通基础设施和产业合作为重点,形成“1+4”经济合作布局。双方欢迎中巴经济走廊联委会第四次会议成功举行,同意尽快完成《中巴经济走廊远景规划》。", "http://news.163.com/15/0909/14/B332O90E0001124J.html", Calendar.getInstance().getTime(), "中国青年网", "匿名"); 313 | Article article2 = new Article(5,"中央党校举行秋季学期开学典礼 刘云山出席讲话","新华网北京9月7日电 中共中央党校7日上午举行2015年秋季学期开学典礼。中共中央政治局常委、中央党校校长刘云山出席并讲话,就深入学习贯彻习近平总书记系列重要讲话精神、坚持党校姓党提出要求。" + 314 | "刘云山指出,党校姓党是党校工作的根本原则。坚持党校姓党,重要的是坚持坚定正确的政治方向、贯彻实事求是的思想路线、落实从严治校的基本方针。要把党的基本理论教育和党性党风教育作为主课,深化中国特色社会主义理论体系学习教育,深化对习近平总书记系列重要讲话精神的学习教育,深化党章和党纪党规的学习教育。要坚持实事求是的思想方法和工作方法,弘扬理论联系实际的学风,提高教学和科研工作的针对性实效性。要严明制度、严肃纪律,把从严治校要求体现到党校工作和学员管理各方面,使党校成为不正之风的“净化器”。" + 315 | "刘云山指出,坚持党校姓党,既是对党校教职工的要求,也是对党校学员的要求。每一位学员都要强化党的意识,保持对党忠诚的政治品格,忠诚于党的信仰,坚定道路自信、理论自信、制度自信;忠诚于党的宗旨,牢记为了人民是天职、服务人民是本职,自觉践行党的群众路线;忠诚于党的事业,勤政敬业、先之劳之、敢于担当,保持干事创业的进取心和精气神。要强化党的纪律规矩意识,经常看一看党的政治准则、组织原则执行得怎么样,看一看党的路线方针政策落实得怎么样,看一看重大事项请示报告制度贯彻得怎么样,找差距、明不足,做政治上的明白人、遵规守纪的老实人。" + 316 | "刘云山强调,领导干部来党校学习,就要自觉接受党的优良作风的洗礼,修好作风建设这门大课。要重温党的光荣传统,学习革命先辈、英雄模范和优秀典型的先进事迹和崇高风范,自觉践行社会主义核心价值观,永葆共产党人的先进性纯洁性,以人格力量传递作风建设正能量。要认真落实党中央关于从严治党、改进作风的一系列要求,贯彻从严精神、突出问题导向,把自己摆进去、把职责摆进去,推动思想问题和实际问题一起解决,履行好党和人民赋予的职责使命。" + 317 | "赵乐际出席开学典礼。" + 318 | "中央有关部门负责同志,中央党校校委成员、全体学员和教职工在主会场参加开学典礼。中国浦东、井冈山、延安干部学院全体学员和教职工在分会场参加开学典礼。", "http://news.163.com/15/0907/20/B2UGF9860001124J.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 319 | Article article3 = new Article(6,"俞正声率中央代表团赴大昭寺看望宗教界人士","国际在线报道:7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员赴大昭寺慰问宗教界爱国人士。俞正声向大昭寺赠送了习近平总书记题写的“加强民族团结,建设美丽西藏”贺幛,珐琅彩平安瓶,并向僧人发放布施,他在会见僧众时表示,希望藏传佛教坚持爱国爱教传统。" + 320 | "至今已有1300多年历史的大昭寺,在藏传佛教中拥有至高无上的地位。西藏的寺院归属于某一藏传佛教教派,大昭寺则是各教派共尊的神圣寺院。一年四季不论雨雪风霜,大昭寺外都有从四面八方赶来磕长头拜谒的虔诚信众。" + 321 | "7号上午,赴西藏出席自治区50周年庆祝活动的中共中央政治局常委、全国政协主席俞正声与中央代表团主要成员专门到大昭寺看望僧众,“我代表党中央、国务院和习主席向大家问好。藏传佛教有许多爱国爱教的传统,有许多高僧大德维护祖国统一和民族团结,坚持传播爱国爱教的正信。党和政府对此一贯给予充分肯定,并对藏传佛教的发展给予了支持。”" + 322 | "俞正声回忆这已是他自1995年以来第三次来大昭寺。他表示,过去二十年发生了巨大的变化,而藏传佛教的发展与祖国的发展、西藏的发展息息相关。藏传佛教要更好发展必须与社会主义社会相适应。他也向僧人们提出期望:“佛教既是信仰,也是一种文化和学问,希望大家不断提高自己对佛教的认识和理解,提高自己的水平。希望大家更好地管理好大昭寺,搞好民主管理、科学管理,使我们的管理更加规范。”" + 323 | "今天是中央代表团抵达拉萨的第二天,当天安排有多项与自治区成立五十周年相关活动。继上午接见自治区领导成员、离退休老同志、各族各界群众代表宗教界爱国人士,参观自治区50年成就展外,中央代表团下午还慰问了解放军驻拉萨部队、武警总队等。当天晚些时候,庆祝西藏自治区成立50周年招待会、文艺晚会将在拉萨举行。(来源:环球资讯)", "http://news.163.com/15/0907/16/B2U3O30R00014JB5.html", Calendar.getInstance().getTime(), "国际在线", "全宇虹"); 324 | Article article4 = new Article(7,"张德江:发挥人大主导作用 加快完备法律规范体系","新华网广州9月7日电 中共中央政治局常委、全国人大常委会委员长张德江9月6日至7日在广东出席第21次全国地方立法研讨会,并在佛山市就地方人大工作进行调研。他强调,要全面贯彻落实党的十八大和十八届三中、四中全会精神,深入学习贯彻习近平总书记系列重要讲话精神,认真实施立法法,充分发挥人大及其常委会在立法工作中的主导作用,抓住提高立法质量这个关键,加快形成完备的法律规范体系,为协调推进“四个全面”战略布局提供法治保障。" + 325 | "张德江指出,立法权是人大及其常委会的重要职权。做好新形势下立法工作,要坚持党的领导,贯彻党的路线方针政策和中央重大决策部署,切实增强思想自觉和行动自觉。要牢固树立依法立法、为民立法、科学立法理念,尊重改革发展客观规律和法治建设内在规律,加强重点领域立法,做到立法主动适应改革和经济社会发展需要。充分发挥在立法工作中的主导作用,把握立项、起草、审议等关键环节,科学确定立法项目,协调做好立法工作,研究解决立法中的重点难点问题,建立健全立法工作格局,形成立法工作合力。" + 326 | "张德江说,地方性法规是我国法律体系的重要组成部分。地方立法关键是在本地特色上下功夫、在有效管用上做文章。当前要落实好立法法的规定,扎实推进赋予设区的市地方立法权工作,明确步骤和时间,做好各项准备工作,标准不能降低,底线不能突破,坚持“成熟一个、确定一个”,确保立法质量。" + 327 | "张德江强调,加强和改进立法工作,要有高素质的立法工作队伍作为保障。要把思想政治建设摆在首位,全面提升专业素质能力,充实力量,培养人才,努力造就一支忠于党、忠于国家、忠于人民、忠于法律的立法工作队伍。" + 328 | "张德江一直非常关注地方人大特别是基层人大工作。在粤期间,他来到佛山市人大常委会,详细询问立法、监督等工作情况,希望他们与时俱进、开拓创新,切实担负起宪法法律赋予的职责。他走进南海区人大常委会、顺德区乐从镇人大主席团,同基层人大代表和人大工作者亲切交谈,肯定基层人大代表联系群众的有益做法,强调人大代表不能脱离人民群众,必须把人民利益放在心中,时刻为群众着想,听取群众意见,反映群众意愿,帮助群众解决实际问题。张德江指出,县乡人大在基层治理体系和治理能力建设中具有重要作用。要贯彻落实中央关于加强县乡人大工作和建设的精神,认真实施新修改的地方组织法、选举法、代表法,不断提高基层人大工作水平,推动人大工作迈上新台阶。" + 329 | "中共中央政治局委员、广东省委书记胡春华参加上述活动。", "http://news.163.com/15/0907/20/B2UGEUTJ00014PRF.html", Calendar.getInstance().getTime(), "新华网", "陈菲"); 330 | 331 | Bulk bulk = new Bulk.Builder() 332 | .defaultIndex("article") 333 | .defaultType("article") 334 | .addAction(Arrays.asList( 335 | new Index.Builder(article1).build(), 336 | new Index.Builder(article2).build(), 337 | new Index.Builder(article3).build(), 338 | new Index.Builder(article4).build() 339 | )).build(); 340 | jestClient.execute(bulk); 341 | } 342 | 343 | 344 | private static void createIndex() throws Exception { 345 | JestClient jestClient = JestExample.getJestClient(); 346 | Article article1 = new Article(1,"高圆圆身上淤青 遭家暴还是玩SM遭性虐?","近日,有媒体拍到高圆圆身上的淤青,腿上还有两块伤疤,引起不少人猜测是遭受家暴。" + 347 | "对于遭到家暴的传闻,高圆圆首次作出澄清,称这是因为照顾母亲而留下的伤痕,她跟赵又廷关系好得很。" + 348 | "照顾母亲竟然会留下伤痕?究竟是怎么照顾的。" + 349 | "高圆圆称,“我妈当时住院,她翻身是需要旁人帮助的,我要到床头去,抱起她的上臂,然后她的脚一蹬,这样才能翻过来。" + 350 | "但我们两个的力气都不够,每次一用力的时候,我的大腿就会刚好撞在那个床框上,所以大腿上就撞出那两块淤青了。" + 351 | "事情真的这么简单吗?即使稍微一撞,也不至于淤青吧!" + 352 | "看到那个伤疤以及淤青的皮肤,不得不让人怀疑高圆圆是遭受家暴。" + 353 | "当然,还有另外一个原因,就是玩SM遭性虐。" + 354 | "当然,这么变态的事情,相信女神不会做的。" + 355 | "是照顾母亲留下的伤痕也好,遭受家暴也好,希望女神高圆圆以后都能平平安安健健康康吧!", "http://www.vdfly.com/star/20141119/37968.html", Calendar.getInstance().getTime(), "青春娱乐网", "匿名"); 356 | Article article2 = new Article(2,"习近平:希望新西兰保障输华产品质量安全","新华网惠灵顿11月20日电(记者陈贽 田帆刘华)国家主席习近平20日在惠灵顿同新西兰总理约翰·基举行会谈,双方决定,将中新关系提升为全面战略伙伴关系,共建中新两国利益共同体,推动两国合作不断迈上新台阶。" + 357 | "习近平指出,中新两国相互理解、相互包容、平等相待,建立了高水平政治互信,开展了宽领域互利合作,两国人民都支持发展中新关系。" + 358 | "两国决定建立全面战略伙伴关系,为中新关系发展指明了大方向。" + 359 | "习近平强调,两国要保持高层交往,构筑多层次多渠道交流合作格局。" + 360 | "中新自由贸易协定是中国同发达国家之间达成的第一个自由贸易协定。双方要采取积极举措,推动两国经贸关系不断迈上新台阶,力争早日实现2020年双边贸易额达到300亿新元的新目标。" + 361 | "双方要继续巩固和提升农牧业等传统领域合作。中国有13亿多人口,市场大得很。新西兰乳制品、羊毛、牛羊肉、海产品等优质产品在中国很受欢迎。" + 362 | "希望新方切实保障输华产品质量安全,保障中国消费者权益。金融服务、信息技术、节能环保、生物医药等是中国重点发展的产业,新西兰有竞争力,双方可以开展更多合作。" + 363 | "习近平强调,前不久,中国同其他各方一道,推动亚太经合组织第二十二次领导人非正式会议启动了亚太自由贸易区进程。中新都是亚太经合组织成员,也都是区域全面经济伙伴关系协定谈判方,双方可以在这些机制中加强协调合作,通过打造惠及各方的地区自由贸易安排、建设好亚洲基础设施投资银行,推进亚太经济一体化进程。" + 364 | "南太平洋地区也是中方提出的21世纪海上丝绸之路的自然延伸,我们欢迎新方参与进来,使中新经贸合作取得更大发展。习近平强调,中新两国要加强人文交流,增进相互了解和友谊,中方将在新西兰设立中国文化中心。" + 365 | "中新签署电视合拍协议是中国政府同外国政府签署的首个电视合拍协议。双方要加强防务、执法交流,在打击腐败、追逃追赃等方面开展合作。" + 366 | "中方愿意同新方在南极、太平洋岛国事务上加强合作。约翰·基表示,建交42年来,新中关系取得长足进展,合作不断深化和扩大,彼此成为好伙伴。" + 367 | "新方致力于同中方一道建设好两国全面战略伙伴关系,在中方核心利益和重大关切问题上,新方将继续支持中方。新中签署自由贸易协定6年来,两国合作取得丰硕成果,新形势下,要提高双边贸易投资水平。" + 368 | "新方希望扩大新西兰农产品、乳制品对华出口,欢迎中国企业前来投资。新方重视亚洲基础设施投资银行的作用,将积极参与银行建设。新方将简化签证手续,欢迎更多中国公民前来旅游、留学。" + 369 | "中方积极节能减排,为国际社会合作应对气候变化树立了典范,新方希望同中方加强合作。我祝贺中方成功举办了亚太经合组织领导人非正式会议,愿意同中方一道,推动区域一体化进程,促进亚太地区和平与繁荣。" + 370 | "会谈后,双方发表《中国和新西兰关于建立全面战略伙伴关系的联合声明》。习近平和约翰·基共同见证了多项双边合作文件的签署,涉及气候变化、电视、教育、南极、金融、旅游、食品安全等领域。两国领导人还共同会见记者。" + 371 | "习近平强调,中新关系具有开创性、示范性意义。中新建立了全面战略伙伴关系,为两国关系规划了宏伟蓝图。中新两国签署一系列合作协议,充分展示了两国务实合作的广度和深度。" + 372 | "中国人说:“兄弟同心,其利断金。”毛利族谚语说:“你我篮子在一起,大家生活更美好。”让我们携手合作,谱写中新关系发展新篇章,更好造福两国人民。约翰·基表示,新西兰钦佩中国的非凡成就,相信中国的发展不仅造福中国人民,也给包括新西兰在内的各国带来巨大机遇。" + 373 | "我们愿同中方共同推动两国全面战略伙伴关系发展。王沪宁、栗战书、杨洁篪等参加上述活动。", "http://news.163.com/14/1120/15/ABGM1POL00014JB5.html", Calendar.getInstance().getTime(), "新华网", "NN053"); 374 | Article article3 = new Article(3,"周末陕西将迎雨雪天气 关中近日大部雾霾缭绕","本报讯(首席记者 姬娜)前天,我省47个县城出现雾霾。昨天,关中大部分地区依旧雾霾缭绕,能见度差。预计22日-24日有雨雪天气,届时雾霾或将能得到缓解。对于这样的天气,微信上不再如之前有那么多埋怨牢骚,更多的是无奈。" + 375 | "刘先生说:“现在有雾霾是正常的,天气晴好反倒是不正常的,我已经习惯了。”网友“舟子”调侃称:“与其埋怨世界不如改变自己,多多欣赏一下这霾天气,那是多么美妙的一种朦胧感,让我想起了初恋。”" + 376 | "省气象台专家称,昨天,最低气温略有回升,陕北大部在-4-4℃,关中大部在0-8℃,陕南大部4-9℃,全省最低温出现在吴起, -3.8℃。有了雾霾和云层的包 围,西安温度在6-17℃之间。" + 377 | "预计本月下旬全省以阴天、多云为主,22-24日有降水天气。今天:陕北、关中多云间晴天,陕南多云转阴天;21日:全省多云;22日:陕北多云间阴天,关中、陕南多云转阴天,部分地方有小雨;23日:全省阴天,关中部分、陕南部分地方有小雨;24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。西安今天晴转多云,3-15℃;明天多云,后天阴天。据省气象局消息:昨天我省晴间多云,无明显冷空气活动,早晨大部分地方出现轻雾,11时后泾河、周至、长安、户县、渭南、蒲城、韩城、华县、咸阳、彬县、武功等地出现霾。" + 378 | "受雾霾影响,关中地区除宝鸡外,西安、咸阳、渭南、铜川空气质量为中度污染。今晨0℃气温线主要位于延安南部,07时气温具体分布:陕北大部-4~2℃(最低吴起-4.1℃),关中大部、商洛2~6℃(西安泾河5.8℃),汉中、安康5~10℃。渭南市气象台昨天08时发布霾黄色预警信号:预计未来24小时内渭南大部地区将出现中度霾,局地有重度霾,易形成中度到重度空气污染。今明两天大部地方多云 22-24日有降水过程今天白天:陕北、关中多云间晴天,陕南多云间阴天。早晨关中、陕南部分地方有雾或霾。今天晚上:陕北、关中多云间晴天,陕南多云间阴天。" + 379 | "21日:陕北、关中多云,陕南多云转阴天,南部局地有小雨。22日:陕北、关中多云转阴天,陕南阴天,关中南部和陕南大部有小雨。23日:全省阴天,关中部分有小雨,陕南有小到中雨。24日:全省阴天,陕北部分地方有雨夹雪,关中、陕南有小雨。25日:全省阴天转多云。26日:全省多云。后期天气趋势(27-29日):我省以阴到多云为主,中南部仍有阴雨天气。", "http://xian.qq.com/a/20141120/011300.htm", Calendar.getInstance().getTime(), "三秦都市报 - 三秦网", "姬娜"); 380 | Index index1 = new Index.Builder(article1).index("article").type("article").build(); 381 | Index index2 = new Index.Builder(article2).index("article").type("article").build(); 382 | Index index3 = new Index.Builder(article3).index("article").type("article").build(); 383 | JestResult jestResult1 = jestClient.execute(index1); 384 | System.out.println(jestResult1.getJsonString()); 385 | JestResult jestResult2 = jestClient.execute(index2); 386 | System.out.println(jestResult2.getJsonString()); 387 | JestResult jestResult3 = jestClient.execute(index3); 388 | System.out.println(jestResult3.getJsonString()); 389 | } 390 | 391 | 392 | private static JestClient getJestClient() { 393 | JestClientFactory factory = new JestClientFactory(); 394 | factory.setHttpClientConfig(new HttpClientConfig 395 | .Builder("http://127.0.0.1:9200") 396 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 397 | .multiThreaded(true) 398 | .readTimeout(10000) 399 | .build()); 400 | JestClient client = factory.getObject(); 401 | return client; 402 | } 403 | 404 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/controller/ArticleController.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.controller; 2 | 3 | import net.aimeizi.model.Article; 4 | import net.aimeizi.service.ArticleService; 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.stereotype.Controller; 8 | import org.springframework.web.bind.annotation.RequestMapping; 9 | import org.springframework.web.bind.annotation.RequestMethod; 10 | import org.springframework.web.servlet.ModelAndView; 11 | 12 | import javax.servlet.http.HttpServletRequest; 13 | import javax.servlet.http.HttpServletResponse; 14 | import java.util.List; 15 | import java.util.Map; 16 | 17 | @Controller 18 | public class ArticleController { 19 | 20 | @Autowired 21 | // @Resource 22 | private ArticleService articleService; 23 | 24 | @RequestMapping(value = "/", method = RequestMethod.GET) 25 | public String tosearch() { 26 | return "search"; 27 | } 28 | 29 | @RequestMapping(value = "/search", method = RequestMethod.POST) 30 | public ModelAndView search(HttpServletRequest request, HttpServletResponse response) { 31 | ModelAndView modelAndView = new ModelAndView(); 32 | modelAndView.setViewName("search"); 33 | String field = request.getParameter("field"); 34 | String queryString = request.getParameter("queryString"); 35 | String older = request.getParameter("older"); 36 | String pageNumber = request.getParameter("pageNumber"); 37 | String pageSize = request.getParameter("pageSize"); 38 | if (StringUtils.isEmpty(queryString)) { 39 | return modelAndView; 40 | } 41 | try { 42 | if (StringUtils.isEmpty(pageNumber) || StringUtils.isEmpty(pageSize)) { 43 | pageNumber = String.valueOf("1"); 44 | pageSize = String.valueOf("10"); 45 | } 46 | Map maps = articleService.search(field, queryString, older, Integer.parseInt(pageNumber), Integer.parseInt(pageSize)); 47 | modelAndView.addObject("queryString", queryString); 48 | modelAndView.addObject("articles", (List
) maps.get("articles")); 49 | long count = (Long) maps.get("count"); 50 | modelAndView.addObject("count", count); 51 | modelAndView.addObject("took", (Long) maps.get("took")); 52 | modelAndView.addObject("field", field); 53 | modelAndView.addObject("older", older); 54 | modelAndView.addObject("pageNumber", pageNumber); 55 | modelAndView.addObject("pageSize", pageSize); 56 | modelAndView.addObject("totalPages", count % Integer.parseInt(pageSize) == 0 ? count / Integer.parseInt(pageSize) : count / Integer.parseInt(pageSize) + 1); 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | } 60 | return modelAndView; 61 | } 62 | 63 | } -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/ArticleDao.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | /** 6 | * Created by Administrator on 2015/9/10. 7 | */ 8 | public interface ArticleDao { 9 | void save(Article article); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/dao/impl/ArticleDaoImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.dao.impl; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.jdbc.core.JdbcTemplate; 7 | import org.springframework.jdbc.core.PreparedStatementSetter; 8 | import org.springframework.stereotype.Repository; 9 | 10 | import javax.sql.DataSource; 11 | import java.sql.Date; 12 | import java.sql.PreparedStatement; 13 | import java.sql.SQLException; 14 | 15 | /** 16 | * Created by Administrator on 2015/9/10. 17 | */ 18 | @Repository 19 | public class ArticleDaoImpl implements ArticleDao { 20 | 21 | private JdbcTemplate jdbcTemplate; 22 | 23 | @Autowired 24 | public void setDataSource(DataSource dataSource) { 25 | this.jdbcTemplate = new JdbcTemplate(dataSource); 26 | } 27 | 28 | public void save(final Article article) { 29 | String sql = "INSERT INTO news (title,content,url,source,author,pubdate) VALUES (?,?,?,?,?,?)"; 30 | jdbcTemplate.update(sql, new PreparedStatementSetter() { 31 | public void setValues(PreparedStatement ps) throws SQLException { 32 | ps.setString(1,article.getTitle()); 33 | ps.setString(2,article.getContent()); 34 | ps.setString(3,article.getUrl()); 35 | ps.setString(4,article.getSource()); 36 | ps.setString(5,article.getAuthor()); 37 | ps.setDate(6, new Date(article.getPubdate().getTime())); 38 | } 39 | }); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/model/Article.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.model; 2 | 3 | import io.searchbox.annotations.JestId; 4 | 5 | import java.util.Date; 6 | 7 | public class Article { 8 | 9 | @JestId 10 | private int id; 11 | private String title; 12 | private String content; 13 | private String url; 14 | private Date pubdate; 15 | private String source; 16 | private String author; 17 | 18 | public Article() { 19 | } 20 | 21 | public Article(int id, String title, String content, String url, 22 | Date pubdate, String source, String author) { 23 | super(); 24 | this.id = id; 25 | this.title = title; 26 | this.content = content; 27 | this.url = url; 28 | this.pubdate = pubdate; 29 | this.source = source; 30 | this.author = author; 31 | } 32 | 33 | public int getId() { 34 | return id; 35 | } 36 | 37 | public void setId(int id) { 38 | this.id = id; 39 | } 40 | 41 | public String getTitle() { 42 | return title; 43 | } 44 | 45 | public void setTitle(String title) { 46 | this.title = title; 47 | } 48 | 49 | public String getContent() { 50 | return content; 51 | } 52 | 53 | public void setContent(String content) { 54 | this.content = content; 55 | } 56 | 57 | public String getUrl() { 58 | return url; 59 | } 60 | 61 | public void setUrl(String url) { 62 | this.url = url; 63 | } 64 | 65 | public Date getPubdate() { 66 | return pubdate; 67 | } 68 | 69 | public void setPubdate(Date pubdate) { 70 | this.pubdate = pubdate; 71 | } 72 | 73 | public String getSource() { 74 | return source; 75 | } 76 | 77 | public void setSource(String source) { 78 | this.source = source; 79 | } 80 | 81 | public String getAuthor() { 82 | return author; 83 | } 84 | 85 | public void setAuthor(String author) { 86 | this.author = author; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/ArticleService.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service; 2 | 3 | import net.aimeizi.model.Article; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * Created by Administrator on 2015/9/21. 10 | */ 11 | public interface ArticleService { 12 | 13 | /** 14 | * 根据field和queryString全文检索 15 | * @param field 16 | * @param queryString 17 | * @param older 18 | * @param pageNumber 19 | * @param pageSize 20 | * @return 21 | */ 22 | Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/service/impl/ArticleServiceImpl.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.service.impl; 2 | 3 | import com.google.gson.GsonBuilder; 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonObject; 6 | import io.searchbox.client.JestClient; 7 | import io.searchbox.client.JestClientFactory; 8 | import io.searchbox.client.config.HttpClientConfig; 9 | import io.searchbox.core.Search; 10 | import io.searchbox.core.SearchResult; 11 | import net.aimeizi.model.Article; 12 | import net.aimeizi.service.ArticleService; 13 | import org.elasticsearch.index.query.QueryBuilders; 14 | import org.elasticsearch.search.builder.SearchSourceBuilder; 15 | import org.elasticsearch.search.highlight.HighlightBuilder; 16 | import org.springframework.stereotype.Service; 17 | 18 | import java.text.ParseException; 19 | import java.text.SimpleDateFormat; 20 | import java.util.ArrayList; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * Created by Administrator on 2015/9/21. 27 | */ 28 | @Service 29 | public class ArticleServiceImpl implements ArticleService { 30 | 31 | 32 | private static JestClient getJestClient() { 33 | JestClientFactory factory = new JestClientFactory(); 34 | factory.setHttpClientConfig(new HttpClientConfig 35 | .Builder("http://127.0.0.1:9200") 36 | .gson(new GsonBuilder().setDateFormat("yyyy-MM-dd'T'HH:mm:ss").create()) 37 | .multiThreaded(true) 38 | .readTimeout(10000) 39 | .build()); 40 | JestClient client = factory.getObject(); 41 | return client; 42 | } 43 | 44 | /** 45 | * 检索 46 | * 47 | * @param field 48 | * @param queryString 49 | * @param older 50 | * @param pageNumber 51 | * @param pageSize 52 | * @return 53 | * @throws Exception 54 | */ 55 | public Map search(String field, String queryString, String older, int pageNumber, int pageSize) throws Exception { 56 | List
articles = new ArrayList
(); 57 | JestClient jestClient = getJestClient(); 58 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); 59 | // 构建查询 60 | if ("all".equals(field)) { 61 | searchSourceBuilder.query(QueryBuilders.queryStringQuery(queryString)); 62 | } else { 63 | searchSourceBuilder.query(QueryBuilders.termQuery(field, queryString)); 64 | // 设置排序 65 | // searchSourceBuilder.sort(field, "asc".equals(older) ? SortOrder.ASC : SortOrder.DESC); // 设置排序字段及排序顺序 66 | } 67 | // 设置高亮字段 68 | HighlightBuilder highlightBuilder = new HighlightBuilder(); 69 | highlightBuilder.field("title");//高亮title 70 | highlightBuilder.field("content");//高亮content 71 | highlightBuilder.field("author");//高亮author 72 | highlightBuilder.field("source");//高亮source 73 | highlightBuilder.preTags("").postTags("");//高亮标签 74 | highlightBuilder.fragmentSize(200);//高亮内容长度 75 | searchSourceBuilder.highlight(highlightBuilder); 76 | 77 | // 设置分页 78 | searchSourceBuilder.from((pageNumber - 1) * pageSize);//设置起始页 79 | searchSourceBuilder.size(pageSize);//设置页大小 80 | Search search = new Search.Builder(searchSourceBuilder.toString()) 81 | .addIndex("news")// 索引名称 82 | .build(); 83 | SearchResult result = jestClient.execute(search); 84 | 85 | // 手动解析 86 | // parseSearchResult(articles, result); 87 | 88 | // 自动解析 89 | JsonObject jsonObject = result.getJsonObject(); 90 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 91 | long took = jsonObject.get("took").getAsLong(); 92 | long total = hitsobject.get("total").getAsLong(); 93 | List> hits = result.getHits(Article.class); 94 | for (SearchResult.Hit hit : hits) { 95 | Article source = hit.source; 96 | //获取高亮后的内容 97 | Map> highlight = hit.highlight; 98 | List titlelist = highlight.get("title");//高亮后的title 99 | if (titlelist != null) { 100 | source.setTitle(titlelist.get(0)); 101 | } 102 | List contentlist = highlight.get("content");//高亮后的content 103 | if (contentlist != null) { 104 | source.setContent(contentlist.get(0)); 105 | } 106 | List authorlist = highlight.get("author");//高亮后的author 107 | if (authorlist != null) { 108 | source.setAuthor(authorlist.get(0)); 109 | } 110 | List sourcelist = highlight.get("source");//高亮后的source 111 | if (sourcelist != null) { 112 | source.setSource(sourcelist.get(0)); 113 | } 114 | Article article = new Article(); 115 | article.setId(source.getId()); 116 | article.setTitle(source.getTitle()); 117 | article.setContent(source.getContent()); 118 | article.setSource(source.getSource()); 119 | article.setAuthor(source.getAuthor()); 120 | article.setUrl(source.getUrl()); 121 | article.setPubdate(source.getPubdate()); 122 | articles.add(article); 123 | } 124 | Map maps = new HashMap(); 125 | maps.put("articles", articles); 126 | maps.put("count", total); 127 | maps.put("took", took); 128 | return maps; 129 | } 130 | 131 | /** 132 | * 手动解析查询结果 133 | * @param articles 134 | * @param result 135 | * @throws ParseException 136 | */ 137 | private void parseSearchResult(List
articles, SearchResult result) throws ParseException { 138 | JsonObject jsonObject = result.getJsonObject(); 139 | long took = jsonObject.get("took").getAsLong(); 140 | JsonObject hitsobject = jsonObject.getAsJsonObject("hits"); 141 | long total = hitsobject.get("total").getAsLong(); 142 | JsonArray jsonArray = hitsobject.getAsJsonArray("hits"); 143 | 144 | for (int i = 0; i < jsonArray.size(); i++) { 145 | JsonObject jsonHitsObject = jsonArray.get(i).getAsJsonObject(); 146 | 147 | // 获取返回字段 148 | JsonObject sourceObject = jsonHitsObject.get("_source").getAsJsonObject(); 149 | 150 | // 封装Article对象 151 | Article article = new Article(); 152 | article.setTitle(sourceObject.get("title").getAsString()); 153 | article.setContent(sourceObject.get("content").getAsString()); 154 | article.setSource(sourceObject.get("source").getAsString()); 155 | article.setAuthor(sourceObject.get("author").getAsString()); 156 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 157 | article.setPubdate(sdf.parse(sourceObject.get("pubdate").getAsString())); 158 | article.setUrl(sourceObject.get("url").getAsString()); 159 | 160 | // 获取高亮字段 161 | JsonObject highlightObject = jsonHitsObject.get("highlight").getAsJsonObject(); 162 | String content = null; 163 | String title = null; 164 | String source = null; 165 | String author = null; 166 | if (highlightObject.get("content") != null) { 167 | content = highlightObject.get("content").getAsJsonArray().get(0).getAsString(); 168 | } 169 | if (highlightObject.get("title") != null) { 170 | title = highlightObject.get("title").getAsJsonArray().get(0).getAsString(); 171 | } 172 | if (highlightObject.get("source") != null) { 173 | source = highlightObject.get("source").getAsJsonArray().get(0).getAsString(); 174 | } 175 | if (highlightObject.get("author") != null) { 176 | author = highlightObject.get("author").getAsJsonArray().get(0).getAsString(); 177 | } 178 | if (content != null) { 179 | article.setContent(content); 180 | } 181 | if (title != null) { 182 | article.setTitle(title); 183 | } 184 | if (source != null) { 185 | article.setSource(source); 186 | } 187 | if (author != null) { 188 | article.setAuthor(author); 189 | } 190 | articles.add(article); 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/GovNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * 爬取中央政府网要闻、热点、部门新闻、地方报道、执法监管等新闻信息 13 | * Created by Administrator on 2015/9/10. 14 | */ 15 | public class GovNewsPageProcesser implements PageProcessor { 16 | 17 | // 新闻列表 18 | private final static String URL_LIST = "http://new\\.sousuo\\.gov\\.cn\\/column\\/\\d+\\/\\d+\\.htm"; 19 | 20 | // 新闻列表页正文url 21 | private final static String URL_POST = "http://www\\.gov\\.cn/[\\w/-]+\\.htm"; 22 | 23 | private Site site = Site.me() 24 | .setRetryTimes(3) 25 | .setSleepTime(1000) 26 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"); 27 | 28 | public void process(Page page) { 29 | // 列表页 30 | if(page.getUrl().regex(URL_LIST).match()){ 31 | // 添加详情页请求链接 32 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 33 | // 添加列表页请求链接 34 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 35 | }else{// 详情页 36 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//div[@class='pages-title']").toString())); 37 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/table[@id='printContent']/tbody/tr/td").toString())); 38 | page.putField("source",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']/span[@class='font'][2]").toString().replace("来源: ", ""))); 39 | page.putField("author",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages_content']/div[@class='editor']").toString().replace("责任编辑: ", ""))); 40 | page.putField("create",Utils.replaceHTML(page.getHtml().xpath("//div[@class='article-colum']/div[@class='pages-date']").toString())); 41 | page.putField("url",page.getUrl().get()); 42 | 43 | String title = (String)page.getResultItems().get("title"); 44 | String content = (String)page.getResultItems().get("content"); 45 | String create = (String)page.getResultItems().get("create"); 46 | String source = (String)page.getResultItems().get("source"); 47 | String url = (String)page.getResultItems().get("url"); 48 | String author = (String)page.getResultItems().get("author"); 49 | 50 | // 创建article 51 | Article article = Utils.createArticle(title, content, source, author, url, create); 52 | 53 | // 索引 54 | 55 | Utils.index(article); 56 | 57 | } 58 | } 59 | 60 | public Site getSite() { 61 | return site; 62 | } 63 | 64 | /** 65 | * html字符过滤 66 | * @param str 67 | * @return 68 | */ 69 | private static String replaceHTML(String str){ 70 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 71 | } 72 | 73 | public static void main(String[] args) { 74 | 75 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 76 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 77 | Spider.create(new GovNewsPageProcesser()) 78 | .addUrl("http://new.sousuo.gov.cn/column/19769/0.htm") //要闻 79 | .addUrl("http://new.sousuo.gov.cn/column/16704/0.htm") //热点 80 | .addUrl("http://new.sousuo.gov.cn/column/16700/0.htm") //部门新闻 81 | .addUrl("http://new.sousuo.gov.cn/column/16699/0.htm") //地方报道 82 | .addUrl("http://new.sousuo.gov.cn/column/16697/0.htm") //执法监管 83 | .addUrl("http://new.sousuo.gov.cn/column/19423/0.htm") //国务院信息 84 | .addUrl("http://new.sousuo.gov.cn/column/16622/0.htm") //讲话 85 | .addUrl("http://new.sousuo.gov.cn/column/16623/0.htm") //会议 86 | .addUrl("http://new.sousuo.gov.cn/column/16621/0.htm") //活动 87 | .addUrl("http://new.sousuo.gov.cn/column/16620/0.htm") //出访 88 | .addUrl("http://new.sousuo.gov.cn/column/16740/0.htm") //专题信息-最新 89 | .addUrl("http://new.sousuo.gov.cn/column/16739/0.htm") //专题信息-聚焦 90 | .addUrl("http://new.sousuo.gov.cn/column/16743/0.htm") //事件 91 | .addUrl("http://new.sousuo.gov.cn/column/16744/0.htm") //预案 92 | .addUrl("http://new.sousuo.gov.cn/column/16742/0.htm") //工作 93 | .addUrl("http://new.sousuo.gov.cn/column/16765/0.htm") //政策法规解读-专家 94 | .addUrl("http://new.sousuo.gov.cn/column/16764/0.htm") //政策法规解读-媒体 95 | .addUrl("http://new.sousuo.gov.cn/column/17999/0.htm") //评论-要论 96 | .addUrl("http://new.sousuo.gov.cn/column/18000/0.htm") //评论-时评 97 | .addUrl("http://new.sousuo.gov.cn/column/18001/0.htm") //评论-网评 98 | .addUrl("http://new.sousuo.gov.cn/column/16852/0.htm") //数据要闻 99 | .addPipeline(jdbcPipeline) // 将抓取到的结果保存到数据库 100 | .thread(5) 101 | .run(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/JdbcPipeline.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.dao.ArticleDao; 4 | import net.aimeizi.model.Article; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Component; 7 | import us.codecraft.webmagic.ResultItems; 8 | import us.codecraft.webmagic.Task; 9 | import us.codecraft.webmagic.pipeline.Pipeline; 10 | 11 | import java.text.ParseException; 12 | import java.text.SimpleDateFormat; 13 | import java.util.Date; 14 | import java.util.Map; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | /** 19 | * Created by Administrator on 2015/9/10. 20 | */ 21 | @Component("jdbcPipeline") 22 | public class JdbcPipeline implements Pipeline { 23 | 24 | @Autowired 25 | ArticleDao articleDao; 26 | 27 | public void process(ResultItems resultItems, Task task) { 28 | Map items = resultItems.getAll(); 29 | if(resultItems!=null&&resultItems.getAll().size()>0){ 30 | Article article = new Article(); 31 | article.setTitle((String) items.get("title")); 32 | article.setContent((String) items.get("content")); 33 | article.setSource((String) items.get("source")); 34 | article.setAuthor((String) items.get("author")); 35 | article.setUrl((String)items.get("url")); 36 | String dataStr = (String)items.get("create"); 37 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 38 | Matcher matcher = pattern.matcher(dataStr); 39 | if(matcher.find()){ 40 | dataStr = matcher.group(0); 41 | } 42 | try { 43 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(dataStr)); 44 | } catch (ParseException e) { 45 | e.printStackTrace(); 46 | } 47 | articleDao.save(article); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/NeteaseNewsPageProcesser.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import net.aimeizi.model.Article; 4 | import org.springframework.context.ApplicationContext; 5 | import org.springframework.context.support.ClassPathXmlApplicationContext; 6 | import us.codecraft.webmagic.Page; 7 | import us.codecraft.webmagic.Site; 8 | import us.codecraft.webmagic.Spider; 9 | import us.codecraft.webmagic.processor.PageProcessor; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/9 0009. 13 | */ 14 | public class NeteaseNewsPageProcesser implements PageProcessor { 15 | 16 | private Site site = Site.me().setDomain("news.163.com") 17 | .setRetryTimes(3) 18 | .setSleepTime(1000) 19 | .setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36"); 20 | 21 | public static final String URL_LIST = "http://news\\.163\\.com/special/\\w+/\\w+\\.html"; 22 | 23 | public static final String URL_POST = "http://news\\.163\\.com/.+\\.html"; 24 | 25 | public void process(Page page) { 26 | //列表页 27 | if (page.getUrl().regex(URL_LIST).match()||page.getUrl().regex("http://news\\.163\\.com/domestic").match()||page.getUrl().regex("http://news\\.163\\.com/shehui").match()) { 28 | page.addTargetRequests(page.getHtml().links().regex(URL_POST).all()); 29 | page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all()); 30 | }else{ 31 | 32 | page.putField("title", Utils.replaceHTML(page.getHtml().xpath("//h1[@id='h1title']").toString())); 33 | page.putField("content", Utils.replaceHTML(page.getHtml().xpath("//div[@id='endText']").toString())); 34 | page.putField("create", Utils.replaceHTML(page.getHtml().xpath("//div[@class=\"ep-time-soure cDGray\"]").toString())); 35 | page.putField("source", Utils.replaceHTML(page.getHtml().xpath("//a[@id=\"ne_article_source\"]/text()").toString())); 36 | page.putField("url", page.getUrl().get()); 37 | 38 | String title = (String)page.getResultItems().get("title"); 39 | String content = (String)page.getResultItems().get("content"); 40 | String create = (String)page.getResultItems().get("create"); 41 | String source = (String)page.getResultItems().get("source"); 42 | String url = (String)page.getResultItems().get("url"); 43 | String author = ""; 44 | 45 | // 创建article 46 | Article article = Utils.createArticle(title, content, source, author, url, create); 47 | 48 | // 索引 49 | 50 | Utils.index(article); 51 | 52 | } 53 | } 54 | 55 | public Site getSite() { 56 | return site; 57 | } 58 | 59 | 60 | 61 | public static void main(String[] args) { 62 | ApplicationContext applicationContext = new ClassPathXmlApplicationContext("applicationContext.xml"); 63 | JdbcPipeline jdbcPipeline = (JdbcPipeline)applicationContext.getBean("jdbcPipeline"); 64 | Spider.create(new NeteaseNewsPageProcesser()) 65 | .addUrl("http://news.163.com/domestic") 66 | .addUrl("http://news.163.com/shehui") 67 | .addPipeline(jdbcPipeline) 68 | .thread(5) 69 | .run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/net/aimeizi/webmagic/Utils.java: -------------------------------------------------------------------------------- 1 | package net.aimeizi.webmagic; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import net.aimeizi.client.elasticsearch.TransportClient; 5 | import net.aimeizi.model.Article; 6 | 7 | import java.text.SimpleDateFormat; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Created by Administrator on 2015/9/11. 13 | */ 14 | public class Utils { 15 | 16 | /** 17 | * 创建Article 18 | * @return 19 | */ 20 | public static Article createArticle(String title,String content,String source,String author,String url,String create){ 21 | Article article = new Article(); 22 | article.setTitle(title); 23 | article.setContent(content); 24 | article.setSource(source); 25 | article.setAuthor(author); 26 | try { 27 | Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); 28 | Matcher matcher = pattern.matcher(create); 29 | if(matcher.find()){ 30 | create = matcher.group(0); 31 | } 32 | article.setPubdate(new SimpleDateFormat("yyyy-MM-dd HH:mm").parse(create)); 33 | }catch (Exception e){ 34 | } 35 | article.setUrl(url); 36 | return article; 37 | } 38 | 39 | /** 40 | * 创建索引 41 | * @param article 42 | */ 43 | public static void index(Article article){ 44 | // 创建索引 45 | ObjectMapper mapper = new ObjectMapper(); 46 | try { 47 | TransportClient.createIndex("news", "article", mapper.writeValueAsString(article)); 48 | }catch (Exception e){ 49 | } 50 | } 51 | 52 | /** 53 | * html字符过滤 54 | * @param str 55 | * @return 56 | */ 57 | public static String replaceHTML(String str){ 58 | return str!=null?str.replaceAll("\\<.*?>","").replaceAll(" ",""):""; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/resources/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: elasticsearch -------------------------------------------------------------------------------- /src/main/resources/jdbc.properties: -------------------------------------------------------------------------------- 1 | jdbc.driverClassName=com.mysql.jdbc.Driver 2 | jdbc.url=jdbc:mysql://localhost:3306/news 3 | jdbc.username=root 4 | jdbc.password=root -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=ERROR, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%r %-5p %c{1}:%L - %m%n 9 | 10 | # NodeChecker gets too chatty during test phase 11 | # log4j.category.io.searchbox.client.config.discovery.NodeChecker=FATAL 12 | 13 | # Same as above, gets too chatty for regular builds 14 | log4j.category.org.elasticsearch=WARN -------------------------------------------------------------------------------- /src/main/resources/sql.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `news` DEFAULT CHARACTER SET utf8; 2 | 3 | USE `news`; 4 | 5 | DROP TABLE IF EXISTS `news`; 6 | 7 | CREATE TABLE `news` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `title` varchar(100) DEFAULT NULL, 10 | `content` mediumtext, 11 | `url` varchar(100) DEFAULT NULL, 12 | `source` varchar(50) DEFAULT NULL, 13 | `author` varchar(50) DEFAULT NULL, 14 | `pubdate` datetime DEFAULT NULL, 15 | PRIMARY KEY (`id`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=112 DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/mvc-dispatcher-servlet.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/pages/search.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" pageEncoding="utf-8" %> 2 | <%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt" %> 3 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> 4 | 5 | 6 | 7 | 8 | elasticsearch-jest 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 43 | 44 | 45 |
46 | 139 | 182 |
183 |
184 |
185 | 186 |

没有要查询的内容

187 |
188 | 189 |
190 |

${article.title}

191 | ${article.content} 192 |

来源:${article.source} 编辑:${article.author} 发表日期:

193 |
194 |
195 | 197 |
198 |
199 |
200 |
201 |

elasticsearch jest client

202 |
203 |
204 | 205 |
206 | 回到顶部 207 |
208 | 209 | 210 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | elasticsearch-jest-example 7 | 8 | contextConfigLocation 9 | 10 | /WEB-INF/mvc-dispatcher-servlet.xml 11 | 12 | 13 | 14 | 15 | org.springframework.web.context.ContextLoaderListener 16 | 17 | 18 | CharacterEncodingFilter 19 | org.springframework.web.filter.CharacterEncodingFilter 20 | 21 | encoding 22 | UTF-8 23 | 24 | 25 | 26 | CharacterEncodingFilter 27 | /* 28 | 29 | 30 | 31 | mvc-dispatcher 32 | org.springframework.web.servlet.DispatcherServlet 33 | 34 | contextConfigLocation 35 | /WEB-INF/mvc-dispatcher-servlet.xml 36 | 37 | 1 38 | 39 | 40 | 41 | mvc-dispatcher 42 | / 43 | 44 | -------------------------------------------------------------------------------- /src/main/webapp/css/pager.css: -------------------------------------------------------------------------------- 1 | #pageNav{padding-left:386px;} 2 | #pageNav ul.pages { 3 | display:block; 4 | border:none; 5 | text-transform:uppercase; 6 | font-size:12px; 7 | margin:10px 0 50px; 8 | padding:0; 9 | } 10 | #pageNav ul.pages li { 11 | list-style:none; 12 | float:left; 13 | border:1px solid #9AAFE5; 14 | text-decoration:none; 15 | margin:0 5px 0 0; 16 | padding:5px; 17 | } 18 | #pageNav ul.pages li:hover { 19 | border:1px solid #003f7e; 20 | } 21 | #pageNav ul.pages li.pgEmpty { 22 | 23 | } 24 | #pageNav ul.pages li.pgCurrent { 25 | border:none; 26 | /*border:1px solid #003f7e;*/ 27 | color:#000; 28 | font-weight:700; 29 | /*background-color:#eee;*/ 30 | background: none repeat scroll 0 0 #2E6AB1; 31 | border: 1px solid #2E6AB1; 32 | color: #FFFFFF; 33 | font-weight: bold; 34 | } -------------------------------------------------------------------------------- /src/main/webapp/images/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v5tech/elasticsearch-jest-example/041008b96f219e5b9bad4d8a429faa44aa862cab/src/main/webapp/images/top.png -------------------------------------------------------------------------------- /src/main/webapp/js/jquery.pager.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery pager plugin 3 | * Version 1.0 (12/22/2008) 4 | * @requires jQuery v1.2.6 or later 5 | * 6 | * Example at: http://jonpauldavies.github.com/JQuery/Pager/PagerDemo.html 7 | * 8 | * Copyright (c) 2008-2009 Jon Paul Davies 9 | * Dual licensed under the MIT and GPL licenses: 10 | * http://www.opensource.org/licenses/mit-license.php 11 | * http://www.gnu.org/licenses/gpl.html 12 | * 13 | * Read the related blog post and contact the author at http://www.j-dee.com/2008/12/22/jquery-pager-plugin/ 14 | * 15 | * This version is far from perfect and doesn't manage it's own state, therefore contributions are more than welcome! 16 | * 17 | * Usage: .pager({ pagenumber: 1, pagecount: 15, buttonClickCallback: PagerClickTest }); 18 | * 19 | * Where pagenumber is the visible page number 20 | * pagecount is the total number of pages to display 21 | * buttonClickCallback is the method to fire when a pager button is clicked. 22 | * 23 | * buttonClickCallback signiture is PagerClickTest = function(pageclickednumber) 24 | * Where pageclickednumber is the number of the page clicked in the control. 25 | * 26 | * The included Pager.CSS file is a dependancy but can obviously tweaked to your wishes 27 | * Tested in IE6 IE7 Firefox & Safari. Any browser strangeness, please report. 28 | */ 29 | (function($) { 30 | 31 | $.fn.pager = function(options) { 32 | 33 | var opts = $.extend({}, $.fn.pager.defaults, options); 34 | 35 | return this.each(function() { 36 | 37 | // empty out the destination element and then render out the pager with the supplied options 38 | $(this).empty().append(renderpager(parseInt(options.pagenumber), parseInt(options.pagecount), options.buttonClickCallback)); 39 | 40 | // specify correct cursor activity 41 | $('.pages li').mouseover(function() { document.body.style.cursor = "pointer"; }).mouseout(function() { document.body.style.cursor = "auto"; }); 42 | }); 43 | }; 44 | 45 | // render and return the pager with the supplied options 46 | function renderpager(pagenumber, pagecount, buttonClickCallback) { 47 | 48 | // setup $pager to hold render 49 | var $pager = $('
    '); 50 | 51 | // add in the previous and next buttons 52 | $pager.append(renderButton('首页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('上一页', pagenumber, pagecount, buttonClickCallback)); 53 | 54 | // pager currently only handles 10 viewable pages ( could be easily parameterized, maybe in next version ) so handle edge cases 55 | var startPoint = 1; 56 | var endPoint = 9; 57 | 58 | if (pagenumber > 4) { 59 | startPoint = pagenumber - 4; 60 | endPoint = pagenumber + 4; 61 | } 62 | 63 | if (endPoint > pagecount) { 64 | startPoint = pagecount - 8; 65 | endPoint = pagecount; 66 | } 67 | 68 | if (startPoint < 1) { 69 | startPoint = 1; 70 | } 71 | 72 | // loop thru visible pages and render buttons 73 | for (var page = startPoint; page <= endPoint; page++) { 74 | 75 | var currentButton = $('
  • ' + (page) + '
  • '); 76 | 77 | page == pagenumber ? currentButton.addClass('pgCurrent') : currentButton.click(function() { buttonClickCallback(this.firstChild.data); }); 78 | currentButton.appendTo($pager); 79 | } 80 | 81 | // render in the next and last buttons before returning the whole rendered control back. 82 | $pager.append(renderButton('下一页', pagenumber, pagecount, buttonClickCallback)).append(renderButton('末页', pagenumber, pagecount, buttonClickCallback)); 83 | 84 | return $pager; 85 | } 86 | 87 | // renders and returns a 'specialized' button, ie 'next', 'previous' etc. rather than a page number button 88 | function renderButton(buttonLabel, pagenumber, pagecount, buttonClickCallback) { 89 | 90 | var $Button = $('
  • ' + buttonLabel + '
  • '); 91 | 92 | var destPage = 1; 93 | 94 | // work out destination page for required button type 95 | switch (buttonLabel) { 96 | case "首页": 97 | destPage = 1; 98 | break; 99 | case "上一页": 100 | destPage = pagenumber - 1; 101 | break; 102 | case "下一页": 103 | destPage = pagenumber + 1; 104 | break; 105 | case "末页": 106 | destPage = pagecount; 107 | break; 108 | } 109 | 110 | // disable and 'grey' out buttons if not needed. 111 | if (buttonLabel == "首页" || buttonLabel == "上一页") { 112 | pagenumber <= 1 ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 113 | } 114 | else { 115 | pagenumber >= pagecount ? $Button.addClass('pgEmpty') : $Button.click(function() { buttonClickCallback(destPage); }); 116 | } 117 | 118 | return $Button; 119 | } 120 | 121 | // pager defaults. hardly worth bothering with in this case but used as placeholder for expansion in the next version 122 | $.fn.pager.defaults = { 123 | pagenumber: 1, 124 | pagecount: 1 125 | }; 126 | 127 | })(jQuery); 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/main/webapp/js/top.js: -------------------------------------------------------------------------------- 1 | function goTop(acceleration, time) 2 | { 3 | acceleration = acceleration || 0.1; 4 | time = time || 16; 5 | 6 | var x1 = 0; 7 | var y1 = 0; 8 | var x2 = 0; 9 | var y2 = 0; 10 | var x3 = 0; 11 | var y3 = 0; 12 | 13 | if (document.documentElement) 14 | { 15 | x1 = document.documentElement.scrollLeft || 0; 16 | y1 = document.documentElement.scrollTop || 0; 17 | } 18 | if (document.body) 19 | { 20 | x2 = document.body.scrollLeft || 0; 21 | y2 = document.body.scrollTop || 0; 22 | } 23 | var x3 = window.scrollX || 0; 24 | var y3 = window.scrollY || 0; 25 | 26 | var x = Math.max(x1, Math.max(x2, x3)); 27 | var y = Math.max(y1, Math.max(y2, y3)); 28 | 29 | var speed = 1 + acceleration; 30 | window.scrollTo(Math.floor(x / speed), Math.floor(y / speed)); 31 | 32 | if(x > 0 || y > 0) 33 | { 34 | var invokeFunction = "goTop(" + acceleration + ", " + time + ")"; 35 | window.setTimeout(invokeFunction, time); 36 | } 37 | } --------------------------------------------------------------------------------