├── xultimate-ikanalyzer
    ├── src
    │   ├── test
    │   │   ├── resources
    │   │   │   ├── ext.dic
    │   │   │   ├── IKAnalyzer.cfg.xml
    │   │   │   ├── stopword.dic
    │   │   │   └── databases.properties
    │   │   └── java
    │   │   │   └── org
    │   │   │       ├── danielli
    │   │   │           └── xultimate
    │   │   │           │   └── ikanalyzer
    │   │   │           │       └── InitializerTest.java
    │   │   │       └── wltea
    │   │   │           └── analyzer
    │   │   │               └── sample
    │   │   │                   ├── IKAnalzyerDemo.java
    │   │   │                   └── LuceneIndexAndSearchDemo.java
    │   └── main
    │   │   ├── java
    │   │       └── org
    │   │       │   ├── danielli
    │   │       │       └── xultimate
    │   │       │       │   └── searching
    │   │       │       │       ├── biz
    │   │       │       │           ├── ExtKeywordBiz.java
    │   │       │       │           ├── StopKeywordBiz.java
    │   │       │       │           ├── SynonymKeywordBiz.java
    │   │       │       │           └── impl
    │   │       │       │           │   ├── MyBatisExtKeywordBiz.java
    │   │       │       │           │   ├── MyBatisStopKeywordBiz.java
    │   │       │       │           │   └── MyBatisSynonymKeywordBiz.java
    │   │       │       │       ├── service
    │   │       │       │           ├── ExtKeywordService.java
    │   │       │       │           ├── StopKeywordService.java
    │   │       │       │           ├── SynonymKeywordService.java
    │   │       │       │           └── impl
    │   │       │       │           │   ├── MyBatisExtKeywordService.java
    │   │       │       │           │   ├── MyBatisStopKeywordService.java
    │   │       │       │           │   └── MyBatisSynonymKeywordService.java
    │   │       │       │       ├── dao
    │   │       │       │           ├── ExtKeywordDAO.java
    │   │       │       │           ├── StopKeywordDAO.java
    │   │       │       │           └── SynonymKeywordDAO.java
    │   │       │       │       ├── po
    │   │       │       │           ├── ExtKeyword.java
    │   │       │       │           ├── StopKeyword.java
    │   │       │       │           └── SynonymKeyword.java
    │   │       │       │       ├── IKTokenizerFactory.java
    │   │       │       │       ├── ExtKeywordInitializer.java
    │   │       │       │       ├── StopKeywordInitializer.java
    │   │       │       │       ├── SolrSynonymDtabaseLoader.java
    │   │       │       │       └── SynonymFilterFactory.java
    │   │       │   └── wltea
    │   │       │       └── analyzer
    │   │       │           ├── core
    │   │       │               ├── ISegmenter.java
    │   │       │               ├── CharacterUtil.java
    │   │       │               ├── CJKSegmenter.java
    │   │       │               ├── IKArbitrator.java
    │   │       │               ├── IKSegmenter.java
    │   │       │               ├── QuickSortSet.java
    │   │       │               ├── CN_QuantifierSegmenter.java
    │   │       │               ├── LexemePath.java
    │   │       │               ├── Lexeme.java
    │   │       │               └── LetterSegmenter.java
    │   │       │           ├── cfg
    │   │       │               ├── Configuration.java
    │   │       │               └── DefaultConfig.java
    │   │       │           ├── lucene
    │   │       │               ├── IKAnalyzer.java
    │   │       │               └── IKTokenizer.java
    │   │       │           ├── dic
    │   │       │               ├── quantifier.dic
    │   │       │               ├── Hit.java
    │   │       │               └── DictSegment.java
    │   │       │           └── query
    │   │       │               └── SWMCQueryBuilder.java
    │   │   └── resources
    │   │       ├── mybatis
    │   │           ├── ExtKeywordDAO.xml
    │   │           ├── StopKeywordDAO.xml
    │   │           ├── SynonymKeywordDAO.xml
    │   │           └── mybatis-3-mapper.dtd
    │   │       ├── solr_db_init.sql
    │   │       ├── applicationContext-service-config.xml
    │   │       ├── applicationContext-service-generic.xml
    │   │       ├── applicationContext-service-crypto.xml
    │   │       ├── applicationContext-dao-base.xml
    │   │       └── applicationContext-dao-generic.xml
    └── pom.xml
├── .gitignore
├── xultimate-lucene
    ├── src
    │   ├── main
    │   │   └── java
    │   │   │   └── org
    │   │   │       └── danielli
    │   │   │           └── xultimate
    │   │   │               └── lucene
    │   │   │                   └── util
    │   │   │                       └── AnalyzerUtils.java
    │   └── test
    │   │   ├── java
    │   │       └── org
    │   │       │   └── danielli
    │   │       │       └── xultimate
    │   │       │           └── lucene
    │   │       │               ├── IdFilter.java
    │   │       │               ├── IdCollector.java
    │   │       │               ├── TestUtils.java
    │   │       │               ├── IdFieldComparator.java
    │   │       │               ├── NearRealtimeSearchTest2.java
    │   │       │               ├── NearRealtimeSearchTest1.java
    │   │       │               └── NearRealtimeSearchTest3.java
    │   │   └── resources
    │   │       └── org
    │   │           └── danielli
    │   │               └── xultimate
    │   │                   └── lucene
    │   │                       ├── applicationContext-service-lucene1.xml
    │   │                       ├── applicationContext-service-lucene2.xml
    │   │                       └── applicationContext-service-lucene3.xml
    └── pom.xml
├── xultimate-solr
    ├── pom.xml
    └── src
    │   └── test
    │       ├── resources
    │           └── applicationContext-service-solr-client.xml
    │       └── java
    │           └── org
    │               └── danielli
    │                   └── xultimate
    │                       └── solr
    │                           └── SolrServerTest.java
├── pom.xml
└── README.md


/xultimate-ikanalyzer/src/test/resources/ext.dic:
--------------------------------------------------------------------------------
1 | 诛仙
2 | 诛仙2
3 | 梦幻诛仙
4 | 梦幻诛仙2
5 | 李天朋


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .settings
 2 | .project
 3 | .classpath
 4 | 
 5 | *.class
 6 | 
 7 | # Package Files #
 8 | *.jar
 9 | *.war
10 | *.ear
11 | 
12 | target
13 | .springBeans


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/ExtKeywordBiz.java:
--------------------------------------------------------------------------------
1 | package org.danielli.xultimate.searching.biz;
2 | 
3 | import java.util.List;
4 | 
5 | public interface ExtKeywordBiz {
6 | 
7 | 	List<String> find(Integer pageNo, Integer pageSize);
8 | }
9 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/StopKeywordBiz.java:
--------------------------------------------------------------------------------
1 | package org.danielli.xultimate.searching.biz;
2 | 
3 | import java.util.List;
4 | 
5 | public interface StopKeywordBiz {
6 | 	
7 | 	List<String> find(Integer pageNo, Integer pageSize);
8 | }
9 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/ExtKeywordService.java:
--------------------------------------------------------------------------------
1 | package org.danielli.xultimate.searching.service;
2 | 
3 | import java.util.List;
4 | 
5 | public interface ExtKeywordService {
6 | 
7 | 	List<String> find(Integer pageNo, Integer pageSize);
8 | }
9 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/StopKeywordService.java:
--------------------------------------------------------------------------------
1 | package org.danielli.xultimate.searching.service;
2 | 
3 | import java.util.List;
4 | 
5 | public interface StopKeywordService {
6 | 
7 | 	List<String> find(Integer pageNo, Integer pageSize);
8 | }
9 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/SynonymKeywordBiz.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.biz;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.danielli.xultimate.searching.po.SynonymKeyword;
 6 | 
 7 | public interface SynonymKeywordBiz {
 8 | 	
 9 | 	List<SynonymKeyword> find(Integer pageNo, Integer pageSize);
10 | }
11 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/dao/ExtKeywordDAO.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.dao;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.danielli.xultimate.orm.mybatis.MyBatisRepository;
 6 | 
 7 | @MyBatisRepository
 8 | public interface ExtKeywordDAO {
 9 | 	
10 | 	List<String> find(Integer offset, Integer rows);
11 | }
12 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/dao/StopKeywordDAO.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.dao;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.danielli.xultimate.orm.mybatis.MyBatisRepository;
 6 | 
 7 | @MyBatisRepository
 8 | public interface StopKeywordDAO {
 9 | 
10 | 	List<String> find(Integer offset, Integer rows);
11 | }
12 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/SynonymKeywordService.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.service;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.danielli.xultimate.searching.po.SynonymKeyword;
 6 | 
 7 | public interface SynonymKeywordService {
 8 | 
 9 | 	List<SynonymKeyword> find(Integer pageNo, Integer pageSize);
10 | }
11 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/resources/IKAnalyzer.cfg.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
 3 | <properties>  
 4 | 	<comment>IK Analyzer 扩展配置</comment>
 5 | 	<!--用户可以在这里配置自己的扩展字典 -->
 6 | 	<entry key="ext_dict">ext.dic;</entry> 
 7 | 	
 8 | 	<!--用户可以在这里配置自己的扩展停止词字典-->
 9 | 	<entry key="ext_stopwords">stopword.dic;</entry> 
10 | 	
11 | </properties>


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/dao/SynonymKeywordDAO.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.dao;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.danielli.xultimate.orm.mybatis.MyBatisRepository;
 6 | import org.danielli.xultimate.searching.po.SynonymKeyword;
 7 | 
 8 | @MyBatisRepository
 9 | public interface SynonymKeywordDAO {
10 | 
11 | 	List<SynonymKeyword> find(Integer offset, Integer rows);
12 | }
13 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/resources/stopword.dic:
--------------------------------------------------------------------------------
 1 | a
 2 | an
 3 | and
 4 | are
 5 | as
 6 | at
 7 | be
 8 | but
 9 | by
10 | for
11 | if
12 | in
13 | into
14 | is
15 | it
16 | no
17 | not
18 | of
19 | on
20 | or
21 | such
22 | that
23 | the
24 | their
25 | then
26 | there
27 | these
28 | they
29 | this
30 | to
31 | was
32 | will
33 | with
34 | 也
35 | 了
36 | 仍
37 | 从
38 | 以
39 | 使
40 | 则
41 | 却
42 | 又
43 | 及
44 | 对
45 | 就
46 | 并
47 | 很
48 | 或
49 | 把
50 | 是
51 | 的
52 | 着
53 | 给
54 | 而
55 | 被
56 | 让
57 | 在
58 | 还
59 | 比
60 | 等
61 | 当
62 | 与
63 | 于
64 | 但


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/mybatis/ExtKeywordDAO.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "mybatis-3-mapper.dtd">
 3 | <mapper namespace="org.danielli.xultimate.searching.dao.ExtKeywordDAO">	
 4 | 	<select id="find" resultType="String">
 5 | 		SELECT keyword FROM solr_ext_keywords JOIN (SELECT id FROM solr_ext_keywords LIMIT #{param1}, #{param2}) TMP USING (id)
 6 | 	</select>
 7 | 	<resultMap type="org.danielli.xultimate.searching.po.ExtKeyword" id="extKeywordMapping">
 8 | 		<id column="id" property="id"/>
 9 | 		<result column="create_time" property="createTime"/>
10 | 		<result column="update_time" property="updateTime"/>
11 | 		<result column="keyword" property="keyword"/>
12 | 	</resultMap>
13 | </mapper> 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/mybatis/StopKeywordDAO.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "mybatis-3-mapper.dtd">
 3 | <mapper namespace="org.danielli.xultimate.searching.dao.StopKeywordDAO">	
 4 | 	<select id="find" resultType="String">
 5 | 			SELECT keyword FROM solr_stop_keywords JOIN (SELECT id FROM solr_stop_keywords LIMIT #{param1}, #{param2}) TMP USING (id)
 6 | 	</select>
 7 | 	<resultMap type="org.danielli.xultimate.searching.po.StopKeyword" id="stopKeywordMapping">
 8 | 		<id column="id" property="id"/>
 9 | 		<result column="create_time" property="createTime"/>
10 | 		<result column="update_time" property="updateTime"/>
11 | 		<result column="keyword" property="keyword"/>
12 | 	</resultMap>
13 | </mapper> 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/impl/MyBatisExtKeywordService.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.service.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.ExtKeywordBiz;
 8 | import org.danielli.xultimate.searching.service.ExtKeywordService;
 9 | import org.springframework.stereotype.Service;
10 | 
11 | @Service("myBatisExtKeywordService")
12 | public class MyBatisExtKeywordService implements ExtKeywordService {
13 | 
14 | 	@Resource(name = "myBatisExtKeywordBiz")
15 | 	private ExtKeywordBiz extKeywordBiz;
16 | 	
17 | 	@Override
18 | 	public List<String> find(Integer pageNo, Integer pageSize) {
19 | 		return extKeywordBiz.find(pageNo, pageSize);
20 | 	}
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/impl/MyBatisStopKeywordService.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.service.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.StopKeywordBiz;
 8 | import org.danielli.xultimate.searching.service.StopKeywordService;
 9 | import org.springframework.stereotype.Service;
10 | 
11 | @Service("myBatisStopKeywordService")
12 | public class MyBatisStopKeywordService implements StopKeywordService {
13 | 
14 | 	@Resource(name = "myBatisStopKeywordBiz")
15 | 	private StopKeywordBiz stopKeywordBiz;
16 | 	
17 | 	@Override
18 | 	public List<String> find(Integer pageNo, Integer pageSize) {
19 | 		return stopKeywordBiz.find(pageNo, pageSize);
20 | 	}
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/resources/databases.properties:
--------------------------------------------------------------------------------
 1 | #------------ MySQL ------------
 2 | datasource.solrDb.jdbc.driver=com.mysql.jdbc.Driver
 3 | datasource.solrDb.jdbc.url=jdbc:mysql://127.0.0.1:3306/orm_db?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull
 4 | datasource.solrDb.jdbc.username=64c15dd4a9f5523d71dde127e8e8db96
 5 | datasource.solrDb.jdbc.password=64c15dd4a9f5523d71dde127e8e8db96
 6 | datasource.solrDb.pool.maxActive=20
 7 | datasource.solrDb.pool.minIdle=10
 8 | 
 9 | #------------ Oracle ------------
10 | #datasource.solrDb.jdbc.driver=oracle.jdbc.driver.OracleDriver
11 | #datasource.solrDb.jdbc.url=jdbc:oracle:thin:@localhost:1521:orm_db
12 | #datasource.solrDb.jdbc.username=64c15dd4a9f5523d71dde127e8e8db96
13 | #datasource.solrDb.jdbc.password=64c15dd4a9f5523d71dde127e8e8db96
14 | #datasource.solrDb.pool.maxActive=20
15 | #datasource.solrDb.pool.minIdle=10


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/service/impl/MyBatisSynonymKeywordService.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.service.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.SynonymKeywordBiz;
 8 | import org.danielli.xultimate.searching.po.SynonymKeyword;
 9 | import org.danielli.xultimate.searching.service.SynonymKeywordService;
10 | import org.springframework.stereotype.Service;
11 | 
12 | @Service("myBatisSynonymKeywordService")
13 | public class MyBatisSynonymKeywordService implements SynonymKeywordService {
14 | 
15 | 	@Resource(name = "myBatisSynonymKeywordBiz")
16 | 	private SynonymKeywordBiz synonymKeywordBiz;
17 | 	
18 | 	@Override
19 | 	public List<SynonymKeyword> find(Integer pageNo, Integer pageSize) {
20 | 		return synonymKeywordBiz.find(pageNo, pageSize);
21 | 	}
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/mybatis/SynonymKeywordDAO.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "mybatis-3-mapper.dtd">
 3 | <mapper namespace="org.danielli.xultimate.searching.dao.SynonymKeywordDAO">	
 4 | 	<select id="find" resultMap="synonymKeyword">
 5 | 		SELECT id, keyword, synonym_keyword, create_time, update_time FROM solr_synonym_keywords JOIN (SELECT id FROM solr_synonym_keywords LIMIT #{param1}, #{param2}) TMP USING (id)
 6 | 	</select>
 7 | 	<resultMap type="org.danielli.xultimate.searching.po.SynonymKeyword" id="synonymKeyword">
 8 | 		<id column="id" property="id"/>
 9 | 		<result column="create_time" property="createTime"/>
10 | 		<result column="synonym_keyword" property="synonymKeyword" />
11 | 		<result column="update_time" property="updateTime"/>
12 | 		<result column="keyword" property="keyword"/>
13 | 	</resultMap>
14 | </mapper> 


--------------------------------------------------------------------------------
/xultimate-lucene/src/main/java/org/danielli/xultimate/lucene/util/AnalyzerUtils.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.lucene.util;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.Reader;
 5 | import java.util.ArrayList;
 6 | import java.util.List;
 7 | 
 8 | import org.apache.lucene.analysis.Analyzer;
 9 | import org.apache.lucene.analysis.TokenStream;
10 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
11 | 
12 | public class AnalyzerUtils {
13 | 
14 | 	public static List<String> tokenStream(Analyzer analyzer, Reader analyzerReader) throws IOException {
15 | 		List<String> result = new ArrayList<String>();
16 | 		TokenStream tokenStream = analyzer.tokenStream(null, analyzerReader);
17 | 		CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
18 | 		while (tokenStream.incrementToken()) {
19 | 			result.add(attribute.toString());
20 | 		}
21 | 		return result;
22 | 	}
23 | 	
24 | }
25 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/impl/MyBatisExtKeywordBiz.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.biz.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.ExtKeywordBiz;
 8 | import org.danielli.xultimate.searching.dao.ExtKeywordDAO;
 9 | import org.danielli.xultimate.util.math.NumberUtils;
10 | import org.springframework.stereotype.Service;
11 | 
12 | @Service("myBatisExtKeywordBiz")
13 | public class MyBatisExtKeywordBiz implements ExtKeywordBiz {
14 | 	
15 | 	@Resource(name = "extKeywordDAO")
16 | 	private ExtKeywordDAO extKeywordDAO;
17 | 	
18 | 	@Override
19 | 	public List<String> find(Integer pageNo, Integer pageSize) {
20 | 		if (!NumberUtils.isPositiveNumber(pageNo)) {
21 | 			pageNo = 1;
22 | 		}
23 | 		Integer offset = (pageNo - 1) * pageSize;
24 | 		return extKeywordDAO.find(offset, pageSize);
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/impl/MyBatisStopKeywordBiz.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.biz.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.StopKeywordBiz;
 8 | import org.danielli.xultimate.searching.dao.StopKeywordDAO;
 9 | import org.danielli.xultimate.util.math.NumberUtils;
10 | import org.springframework.stereotype.Service;
11 | 
12 | @Service("myBatisStopKeywordBiz")
13 | public class MyBatisStopKeywordBiz implements StopKeywordBiz {
14 | 
15 | 	@Resource(name = "stopKeywordDAO")
16 | 	private StopKeywordDAO stopKeywordDAO;
17 | 	
18 | 	@Override
19 | 	public List<String> find(Integer pageNo, Integer pageSize) {
20 | 		if (!NumberUtils.isPositiveNumber(pageNo)) {
21 | 			pageNo = 1;
22 | 		}
23 | 		Integer offset = (pageNo - 1) * pageSize;
24 | 		return stopKeywordDAO.find(offset, pageSize);
25 | 	}
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/po/ExtKeyword.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.po;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | public class ExtKeyword {
 6 | 	
 7 | 	private Long id;
 8 | 	
 9 | 	private String keyword;
10 | 	
11 | 	private Date createTime;
12 | 	
13 | 	private Date updateTime;
14 | 
15 | 	public Long getId() {
16 | 		return id;
17 | 	}
18 | 
19 | 	public void setId(Long id) {
20 | 		this.id = id;
21 | 	}
22 | 
23 | 	public String getKeyword() {
24 | 		return keyword;
25 | 	}
26 | 
27 | 	public void setKeyword(String keyword) {
28 | 		this.keyword = keyword;
29 | 	}
30 | 
31 | 	public Date getCreateTime() {
32 | 		return createTime;
33 | 	}
34 | 
35 | 	public void setCreateTime(Date createTime) {
36 | 		this.createTime = createTime;
37 | 	}
38 | 
39 | 	public Date getUpdateTime() {
40 | 		return updateTime;
41 | 	}
42 | 
43 | 	public void setUpdateTime(Date updateTime) {
44 | 		this.updateTime = updateTime;
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/po/StopKeyword.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.po;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | public class StopKeyword {
 6 | 	
 7 | 	private Long id;
 8 | 	
 9 | 	private String keyword;
10 | 	
11 | 	private Date createTime;
12 | 	
13 | 	private Date updateTime;
14 | 
15 | 	public Long getId() {
16 | 		return id;
17 | 	}
18 | 
19 | 	public void setId(Long id) {
20 | 		this.id = id;
21 | 	}
22 | 
23 | 	public String getKeyword() {
24 | 		return keyword;
25 | 	}
26 | 
27 | 	public void setKeyword(String keyword) {
28 | 		this.keyword = keyword;
29 | 	}
30 | 
31 | 	public Date getCreateTime() {
32 | 		return createTime;
33 | 	}
34 | 
35 | 	public void setCreateTime(Date createTime) {
36 | 		this.createTime = createTime;
37 | 	}
38 | 
39 | 	public Date getUpdateTime() {
40 | 		return updateTime;
41 | 	}
42 | 
43 | 	public void setUpdateTime(Date updateTime) {
44 | 		this.updateTime = updateTime;
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/biz/impl/MyBatisSynonymKeywordBiz.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.biz.impl;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.Resource;
 6 | 
 7 | import org.danielli.xultimate.searching.biz.SynonymKeywordBiz;
 8 | import org.danielli.xultimate.searching.dao.SynonymKeywordDAO;
 9 | import org.danielli.xultimate.searching.po.SynonymKeyword;
10 | import org.danielli.xultimate.util.math.NumberUtils;
11 | import org.springframework.stereotype.Service;
12 | 
13 | @Service("myBatisSynonymKeywordBiz")
14 | public class MyBatisSynonymKeywordBiz implements SynonymKeywordBiz {
15 | 
16 | 	@Resource(name = "synonymKeywordDAO")
17 | 	private SynonymKeywordDAO synonymKeywordDAO;
18 | 	
19 | 	@Override
20 | 	public List<SynonymKeyword> find(Integer pageNo, Integer pageSize) {
21 | 		if (!NumberUtils.isPositiveNumber(pageNo)) {
22 | 			pageNo = 1;
23 | 		}
24 | 		Integer offset = (pageNo - 1) * pageSize;
25 | 		return synonymKeywordDAO.find(offset, pageSize);
26 | 	}
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/IdFilter.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.lucene;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.apache.lucene.index.AtomicReaderContext;
 6 | import org.apache.lucene.index.DocsEnum;
 7 | import org.apache.lucene.index.Term;
 8 | import org.apache.lucene.search.DocIdSet;
 9 | import org.apache.lucene.search.Filter;
10 | import org.apache.lucene.util.Bits;
11 | import org.apache.lucene.util.OpenBitSet;
12 | 
13 | public class IdFilter extends Filter {
14 | 
15 | 	private Long[] existIds;
16 | 	
17 | 	public IdFilter(Long[] existIds) {
18 | 		this.existIds = existIds;
19 | 	}
20 | 	
21 | 	@Override
22 | 	public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
23 | 		OpenBitSet bits = new OpenBitSet(context.reader().maxDoc());
24 | 		for (Long id : existIds) {
25 | 			DocsEnum docsEnum = context.reader().termDocsEnum(new Term("id", String.valueOf(id)));
26 | 			if(docsEnum != null && docsEnum.nextDoc() != -1) {   
27 | 	            bits.set(docsEnum.docID());
28 | 	        }  
29 | 		}
30 | 		return bits;
31 | 	}
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/IKTokenizerFactory.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching;
 2 | 
 3 | import java.io.Reader;
 4 | import java.util.Map;
 5 | 
 6 | import org.apache.lucene.analysis.Tokenizer;
 7 | import org.apache.lucene.analysis.util.TokenizerFactory;
 8 | import org.apache.lucene.util.AttributeSource.AttributeFactory;
 9 | import org.wltea.analyzer.lucene.IKTokenizer;
10 | 
11 | 
12 | public class IKTokenizerFactory extends TokenizerFactory {
13 | 
14 | 	public static final boolean DEFAULT_USE_SMART = false;
15 | 	
16 | 	private boolean useSmart;
17 | 	
18 | 	public boolean useSmart() {
19 | 		return useSmart;
20 | 	}
21 | 
22 | 	public void setUseSmart(boolean useSmart) {
23 | 		this.useSmart = useSmart;
24 | 	}
25 | 	
26 | 	public IKTokenizerFactory(Map<String, String> args) {
27 | 		super(args);
28 | 	    assureMatchVersion();
29 | 	    useSmart = getBoolean(args, "useSmart", DEFAULT_USE_SMART);
30 | 	}
31 | 
32 | 	@Override
33 | 	public Tokenizer create(AttributeFactory factory, Reader input) {
34 | 		Tokenizer _IKTokenizer = new IKTokenizer(input , this.useSmart());
35 | 		return _IKTokenizer;
36 | 	}
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/po/SynonymKeyword.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching.po;
 2 | 
 3 | import java.util.Date;
 4 | 
 5 | public class SynonymKeyword {
 6 | 	
 7 | 	private Long id;
 8 | 	
 9 | 	private String keyword;
10 | 	
11 | 	private String synonymKeyword;
12 | 	
13 | 	private Date createTime;
14 | 	
15 | 	private Date updateTime;
16 | 
17 | 	public Long getId() {
18 | 		return id;
19 | 	}
20 | 
21 | 	public void setId(Long id) {
22 | 		this.id = id;
23 | 	}
24 | 
25 | 	public String getKeyword() {
26 | 		return keyword;
27 | 	}
28 | 
29 | 	public void setKeyword(String keyword) {
30 | 		this.keyword = keyword;
31 | 	}
32 | 
33 | 	public String getSynonymKeyword() {
34 | 		return synonymKeyword;
35 | 	}
36 | 
37 | 	public void setSynonymKeyword(String synonymKeyword) {
38 | 		this.synonymKeyword = synonymKeyword;
39 | 	}
40 | 
41 | 	public Date getCreateTime() {
42 | 		return createTime;
43 | 	}
44 | 
45 | 	public void setCreateTime(Date createTime) {
46 | 		this.createTime = createTime;
47 | 	}
48 | 
49 | 	public Date getUpdateTime() {
50 | 		return updateTime;
51 | 	}
52 | 
53 | 	public void setUpdateTime(Date updateTime) {
54 | 		this.updateTime = updateTime;
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/xultimate-lucene/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 	<parent>
 7 | 		<groupId>org.danielli.xultimate</groupId>
 8 | 		<artifactId>xultimate-searching</artifactId>
 9 | 		<version>1.0.0-SNAPSHOT</version>
10 | 	</parent>
11 | 
12 | 	<artifactId>xultimate-lucene</artifactId>
13 | 
14 | 	<name>The X-Ultimate Searching Lucene</name>
15 | 	<description>The X-Ultimate Searching lucene project.</description>
16 | 	<url>https://github.com/daniellitoc/xultimate-searching/tree/master/xultimate-lucene</url>
17 | 
18 | 	<dependencies>
19 | 		<dependency>
20 | 			<groupId>${project.groupId}</groupId>
21 | 			<artifactId>xultimate-ikanalyzer</artifactId>
22 | 			<version>${project.version}</version>
23 | 		</dependency>
24 | 		<dependency>
25 | 			<groupId>junit</groupId>
26 | 			<artifactId>junit</artifactId>
27 | 		</dependency>
28 | 		<dependency>
29 | 			<groupId>org.springframework</groupId>
30 | 			<artifactId>spring-test</artifactId>
31 | 		</dependency>
32 | 	</dependencies>
33 | </project>
34 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/solr_db_init.sql:
--------------------------------------------------------------------------------
 1 | create table IF NOT EXISTS `solr_ext_keywords` (
 2 |   `id` bigint NOT NULL,
 3 |   `keyword` varchar(100) NOT NULL,
 4 |   `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 5 |   `update_time` timestamp NOT NULL,
 6 |   PRIMARY KEY (`id`)
 7 | ) ENGINE = MyISAM;
 8 | 
 9 | insert IGNORE into `solr_ext_keywords` values (1, '李天棚', '1990-04-17 00:00:01', '1990-04-17 00:00:01');
10 | 
11 | create table IF NOT EXISTS `solr_stop_keywords` (
12 |   `id` bigint NOT NULL,
13 |   `keyword` varchar(100) NOT NULL,
14 |   `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
15 |   `update_time` timestamp NOT NULL,
16 |   PRIMARY KEY (`id`)
17 | ) ENGINE = MyISAM;
18 | 
19 | insert IGNORE into `solr_stop_keywords` values (1, '的', '1990-04-17 00:00:01', '1990-04-17 00:00:01');
20 | 
21 | create table IF NOT EXISTS `solr_synonym_keywords` (
22 |   `id` bigint(20) NOT NULL,
23 |   `keyword` varchar(100) NOT NULL,
24 |   `synonym_keyword` varchar(100) NOT NULL,
25 |   `create_time`  timestamp NOT NULL,
26 |   `update_time`  timestamp NOT NULL,
27 |   PRIMARY KEY (`id`)
28 | ) ENGINE = MyISAM;
29 | 
30 | insert IGNORE into `solr_synonym_keywords` values (1, 'Daniel Li', '李天棚', '1990-04-17 00:00:01', '1990-04-17 00:00:01');
31 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/ExtKeywordInitializer.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.PostConstruct;
 6 | import javax.annotation.Resource;
 7 | 
 8 | import org.danielli.xultimate.searching.service.ExtKeywordService;
 9 | import org.danielli.xultimate.util.collections.CollectionUtils;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 | import org.springframework.context.annotation.Lazy;
13 | import org.springframework.stereotype.Service;
14 | import org.wltea.analyzer.cfg.DefaultConfig;
15 | import org.wltea.analyzer.dic.Dictionary;
16 | 
17 | @Service("extKeywordInitializer")
18 | @Lazy(false)
19 | public class ExtKeywordInitializer {
20 | 
21 | 	private static final Logger LOGGER = LoggerFactory.getLogger(ExtKeywordInitializer.class);
22 | 	
23 | 	@Resource(name = "myBatisExtKeywordService")
24 | 	private ExtKeywordService extKeywordService;
25 | 	
26 | 	@PostConstruct
27 | 	public void init() {
28 | 		LOGGER.info("开始加载扩展词词库从数据库");
29 | 		Dictionary.initial(DefaultConfig.getInstance());
30 | 		for (int pageNo = 1; ; pageNo++) {
31 | 			List<String> extKeywordList = extKeywordService.find(pageNo, 10000);
32 | 			if (CollectionUtils.isEmpty(extKeywordList)) {
33 | 				break;
34 | 			}
35 | 			Dictionary.getSingleton().addWords(extKeywordList);
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/StopKeywordInitializer.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.annotation.PostConstruct;
 6 | import javax.annotation.Resource;
 7 | 
 8 | import org.danielli.xultimate.searching.service.StopKeywordService;
 9 | import org.danielli.xultimate.util.collections.CollectionUtils;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 | import org.springframework.context.annotation.Lazy;
13 | import org.springframework.stereotype.Service;
14 | import org.wltea.analyzer.cfg.DefaultConfig;
15 | import org.wltea.analyzer.dic.Dictionary;
16 | 
17 | @Service("stopKeywordInitializer")
18 | @Lazy(false)
19 | public class StopKeywordInitializer {
20 | 
21 | 	private static final Logger LOGGER = LoggerFactory.getLogger(StopKeywordInitializer.class);
22 | 	
23 | 	@Resource(name = "myBatisStopKeywordService")
24 | 	private StopKeywordService stopKeywordService;
25 | 	
26 | 	@PostConstruct
27 | 	public void init() {
28 | 		LOGGER.info("开始加载停用词词库从数据库");
29 | 		Dictionary.initial(DefaultConfig.getInstance());
30 | 		for (int pageNo = 1; ; pageNo++) {
31 | 			List<String> stopKeywordList = stopKeywordService.find(pageNo, 10000);
32 | 			if (CollectionUtils.isEmpty(stopKeywordList)) {
33 | 				break;
34 | 			}
35 | 			Dictionary.getSingleton().addStopWords(stopKeywordList);
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/IdCollector.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.lucene;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | import java.util.LinkedHashSet;
 6 | import java.util.List;
 7 | import java.util.Set;
 8 | 
 9 | import org.apache.lucene.index.AtomicReaderContext;
10 | import org.apache.lucene.search.Collector;
11 | import org.apache.lucene.search.FieldCache;
12 | import org.apache.lucene.search.FieldCache.Longs;
13 | import org.apache.lucene.search.Scorer;
14 | 
15 | public class IdCollector extends Collector {
16 | 	
17 | 	@SuppressWarnings("unused")
18 | 	private Scorer scorer;
19 | 	private Longs currentValues;
20 | 	private Set<Long> result = new LinkedHashSet<Long>();
21 | 	
22 | 	@Override
23 | 	public void setScorer(Scorer scorer) throws IOException {
24 | 		this.scorer = scorer;
25 | 	}
26 | 	
27 | 	@Override
28 | 	public void setNextReader(AtomicReaderContext context) throws IOException {
29 | 		this.currentValues = FieldCache.DEFAULT.getLongs(context.reader(), "id", false);
30 | 	}
31 | 	
32 | 	@Override
33 | 	public void collect(int doc) throws IOException {
34 | 		Long userId = this.currentValues.get(doc);
35 | 		result.add(userId);
36 | 	}
37 | 	
38 | 	public Integer getMatchUserCount() {
39 | 		return result.size();
40 | 	}
41 | 	
42 | 	public List<Long> getResult() {
43 | 		return new ArrayList<Long>(result);
44 | 	}
45 | 	
46 | 	@Override
47 | 	public boolean acceptsDocsOutOfOrder() {
48 | 		return false;
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/ISegmenter.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * IK 中文分词  版本 5.0
 3 |  * IK Analyzer release 5.0
 4 |  * 
 5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 6 |  * contributor license agreements.  See the NOTICE file distributed with
 7 |  * this work for additional information regarding copyright ownership.
 8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 9 |  * (the "License"); you may not use this file except in compliance with
10 |  * the License.  You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  *
20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
21 |  * 版权声明 2012，乌龙茶工作室
22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
23 |  * 
24 |  */
25 | package org.wltea.analyzer.core;
26 | 
27 | 
28 | /**
29 |  * 
30 |  * 子分词器接口
31 |  */
32 | interface ISegmenter {
33 | 	
34 | 	/**
35 | 	 * 从分析器读取下一个可能分解的词元对象
36 | 	 * @param context 分词算法上下文
37 | 	 */
38 | 	void analyze(AnalyzeContext context);
39 | 	
40 | 	
41 | 	/**
42 | 	 * 重置子分析器状态
43 | 	 */
44 | 	void reset();
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/xultimate-solr/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 4 | 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 5 | 	<modelVersion>4.0.0</modelVersion>
 6 | 	<parent>
 7 | 		<groupId>org.danielli.xultimate</groupId>
 8 | 		<artifactId>xultimate-searching</artifactId>
 9 | 		<version>1.0.0-SNAPSHOT</version>
10 | 	</parent>
11 | 
12 | 	<artifactId>xultimate-solr</artifactId>
13 | 
14 | 	<name>The X-Ultimate Toolkit Solr</name>
15 | 	<description>The X-Ultimate Searching solr project.</description>
16 | 	<url>https://github.com/daniellitoc/xultimate-searching/tree/master/xultimate-solr</url>
17 | 
18 | 	<dependencies>
19 | 		<dependency>
20 | 			<groupId>${project.groupId}</groupId>
21 | 			<artifactId>xultimate-core</artifactId>
22 | 			<version>${project.version}</version>
23 | 		</dependency>
24 | 		<dependency>
25 | 			<groupId>${project.groupId}</groupId>
26 | 			<artifactId>xultimate-context</artifactId>
27 | 			<version>${project.version}</version>
28 | 		</dependency>
29 | 		<dependency>
30 | 			<groupId>${project.groupId}</groupId>
31 | 			<artifactId>xultimate-web</artifactId>
32 | 			<version>${project.version}</version>
33 | 		</dependency>
34 | 		<dependency>
35 | 			<groupId>org.apache.solr</groupId>
36 | 			<artifactId>solr-solrj</artifactId>
37 | 		</dependency>
38 | 		<dependency>
39 | 			<groupId>junit</groupId>
40 | 			<artifactId>junit</artifactId>
41 | 		</dependency>
42 | 		<dependency>
43 | 			<groupId>org.springframework</groupId>
44 | 			<artifactId>spring-test</artifactId>
45 | 		</dependency>
46 | 	</dependencies>
47 | </project>
48 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/cfg/Configuration.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * IK 中文分词  版本 5.0
 3 |  * IK Analyzer release 5.0
 4 |  * 
 5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 6 |  * contributor license agreements.  See the NOTICE file distributed with
 7 |  * this work for additional information regarding copyright ownership.
 8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 9 |  * (the "License"); you may not use this file except in compliance with
10 |  * the License.  You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  *
20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
21 |  * 版权声明 2012，乌龙茶工作室
22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
23 |  * 
24 |  */
25 | package org.wltea.analyzer.cfg;
26 | 
27 | import java.util.List;
28 | 
29 | /**
30 |  * 
31 |  * 配置管理类接口
32 |  * 
33 |  */
34 | public interface Configuration {
35 | 	
36 | 	
37 | 	
38 | 	/**
39 | 	 * 返回useSmart标志位
40 | 	 * useSmart =true ，分词器使用智能切分策略， =false则使用细粒度切分
41 | 	 * @return useSmart
42 | 	 */
43 | 	public boolean useSmart();
44 | 	
45 | 	/**
46 | 	 * 设置useSmart标志位
47 | 	 * useSmart =true ，分词器使用智能切分策略， =false则使用细粒度切分
48 | 	 * @param useSmart
49 | 	 */
50 | 	public void setUseSmart(boolean useSmart);
51 | 	
52 | 	
53 | 	/**
54 | 	 * 获取主词典路径
55 | 	 * 
56 | 	 * @return String 主词典路径
57 | 	 */
58 | 	public String getMainDictionary();
59 | 
60 | 	/**
61 | 	 * 获取量词词典路径
62 | 	 * @return String 量词词典路径
63 | 	 */
64 | 	public String getQuantifierDicionary();
65 | 
66 | 	/**
67 | 	 * 获取扩展字典配置路径
68 | 	 * @return List<String> 相对类加载器的路径
69 | 	 */
70 | 	public List<String> getExtDictionarys();
71 | 
72 | 
73 | 	/**
74 | 	 * 获取扩展停止词典配置路径
75 | 	 * @return List<String> 相对类加载器的路径
76 | 	 */
77 | 	public List<String> getExtStopWordDictionarys();
78 | 			
79 | }
80 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * IK 中文分词  版本 5.0.1
 3 |  * IK Analyzer release 5.0.1
 4 |  * 
 5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 6 |  * contributor license agreements.  See the NOTICE file distributed with
 7 |  * this work for additional information regarding copyright ownership.
 8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 9 |  * (the "License"); you may not use this file except in compliance with
10 |  * the License.  You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  *
20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
21 |  * 版权声明 2012，乌龙茶工作室
22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
23 |  * 
24 |  */
25 | package org.wltea.analyzer.lucene;
26 | 
27 | import java.io.Reader;
28 | 
29 | import org.apache.lucene.analysis.Analyzer;
30 | import org.apache.lucene.analysis.Tokenizer;
31 | 
32 | /**
33 |  * IK分词器，Lucene Analyzer接口实现
34 |  * 兼容Lucene 4.0版本
35 |  */
36 | public final class IKAnalyzer extends Analyzer{
37 | 	
38 | 	private boolean useSmart;
39 | 	
40 | 	public boolean useSmart() {
41 | 		return useSmart;
42 | 	}
43 | 
44 | 	public void setUseSmart(boolean useSmart) {
45 | 		this.useSmart = useSmart;
46 | 	}
47 | 
48 | 	/**
49 | 	 * IK分词器Lucene  Analyzer接口实现类
50 | 	 * 
51 | 	 * 默认细粒度切分算法
52 | 	 */
53 | 	public IKAnalyzer(){
54 | 		this(false);
55 | 	}
56 | 	
57 | 	/**
58 | 	 * IK分词器Lucene Analyzer接口实现类
59 | 	 * 
60 | 	 * @param useSmart 当为true时，分词器进行智能切分
61 | 	 */
62 | 	public IKAnalyzer(boolean useSmart){
63 | 		super();
64 | 		this.useSmart = useSmart;
65 | 	}
66 | 
67 | 	/**
68 | 	 * 重载Analyzer接口，构造分词组件
69 | 	 */
70 | 	@Override
71 | 	protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
72 | 		Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
73 | 		return new TokenStreamComponents(_IKTokenizer);
74 | 	}
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/java/org/danielli/xultimate/ikanalyzer/InitializerTest.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.ikanalyzer;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.StringReader;
 5 | 
 6 | import org.apache.lucene.analysis.Analyzer;
 7 | import org.apache.lucene.analysis.TokenStream;
 8 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 9 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
10 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
11 | import org.junit.Test;
12 | import org.junit.runner.RunWith;
13 | import org.springframework.test.context.ContextConfiguration;
14 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
15 | import org.wltea.analyzer.lucene.IKAnalyzer;
16 | 
17 | 
18 | @RunWith(SpringJUnit4ClassRunner.class)
19 | @ContextConfiguration(locations = { "classpath:applicationContext-service-config.xml", "classpath:applicationContext-service-crypto.xml", "classpath:applicationContext-dao-base.xml", "classpath:applicationContext-dao-generic.xml", "classpath:applicationContext-service-generic.xml" })
20 | public class InitializerTest {
21 | 	
22 | 	@Test
23 | 	public void test() {
24 | 		//构建IK分词器，使用smart分词模式
25 | 		Analyzer analyzer = new IKAnalyzer(true);
26 | 		
27 | 		//获取Lucene的TokenStream对象
28 | 	    TokenStream ts = null;
29 | 		try {
30 | 			ts = analyzer.tokenStream("myfield", new StringReader("李天棚的测试"));
31 | 			//获取词元位置属性
32 | 		    OffsetAttribute  offset = ts.addAttribute(OffsetAttribute.class); 
33 | 		    //获取词元文本属性
34 | 		    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
35 | 		    //获取词元文本属性
36 | 		    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
37 | 		    
38 | 		    
39 | 		    //重置TokenStream（重置StringReader）
40 | 			ts.reset(); 
41 | 			//迭代获取分词结果
42 | 			while (ts.incrementToken()) {
43 | 			  System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
44 | 			}
45 | 			//关闭TokenStream（关闭StringReader）
46 | 			ts.end();   // Perform end-of-stream operations, e.g. set the final offset.
47 | 
48 | 		} catch (IOException e) {
49 | 			e.printStackTrace();
50 | 		} finally {
51 | 			//释放TokenStream的所有资源
52 | 			if(ts != null){
53 | 		      try {
54 | 				ts.close();
55 | 		      } catch (IOException e) {
56 | 				e.printStackTrace();
57 | 		      }
58 | 			}
59 | 	    }
60 | 	}
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
 3 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 4 | 	<modelVersion>4.0.0</modelVersion>
 5 | 	<parent>
 6 | 		<groupId>org.danielli.xultimate</groupId>
 7 | 		<artifactId>xultimate-searching</artifactId>
 8 | 		<version>1.0.0-SNAPSHOT</version>
 9 | 	</parent>
10 | 
11 | 	<artifactId>xultimate-ikanalyzer</artifactId>
12 | 
13 | 	<name>The X-Ultimate Searching IKAnalyzer</name>
14 | 	<description>The X-Ultimate Searching IKAnalyzer project.</description>
15 | 	<url>https://github.com/daniellitoc/xultimate-searching/tree/master/xultimate-ikanalyzer</url>
16 | 	
17 | 	<dependencies>
18 | 		<dependency>
19 | 			<groupId>${project.groupId}</groupId>
20 | 			<artifactId>xultimate-core</artifactId>
21 | 			<version>${project.version}</version>
22 | 		</dependency>
23 | 		<dependency>
24 | 			<groupId>${project.groupId}</groupId>
25 | 			<artifactId>xultimate-context</artifactId>
26 | 			<version>${project.version}</version>
27 | 		</dependency>
28 | 		<dependency>
29 | 			<groupId>${project.groupId}</groupId>
30 | 			<artifactId>xultimate-jdbc</artifactId>
31 | 			<version>${project.version}</version>
32 | 		</dependency>
33 | 		<dependency>
34 | 			<groupId>${project.groupId}</groupId>
35 | 			<artifactId>xultimate-web</artifactId>
36 | 			<version>${project.version}</version>
37 | 		</dependency>
38 | 		<dependency>
39 | 			<groupId>${project.groupId}</groupId>
40 | 			<artifactId>xultimate-context-support</artifactId>
41 | 			<version>${project.version}</version>
42 | 		</dependency>
43 | 		<dependency>
44 | 			<groupId>${project.groupId}</groupId>
45 | 			<artifactId>xultimate-mybatis</artifactId>
46 | 			<version>${project.version}</version>
47 | 		</dependency>
48 | 		<dependency>
49 | 			<groupId>org.apache.lucene</groupId>
50 | 			<artifactId>lucene-core</artifactId>
51 | 		</dependency>
52 | 		<dependency>
53 | 			<groupId>org.apache.lucene</groupId>
54 | 			<artifactId>lucene-queryparser</artifactId>
55 | 		</dependency>
56 | 		<dependency>
57 | 			<groupId>org.apache.lucene</groupId>
58 | 			<artifactId>lucene-analyzers-common</artifactId>
59 | 		</dependency>
60 | 		<dependency>
61 | 			<groupId>junit</groupId>
62 | 			<artifactId>junit</artifactId>
63 | 		</dependency>
64 | 		<dependency>
65 | 			<groupId>org.springframework</groupId>
66 | 			<artifactId>spring-test</artifactId>
67 | 		</dependency>
68 | 	</dependencies>
69 | </project>
70 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/applicationContext-service-config.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
29 | 
30 | 	<bean id="encryptPropertyPlaceholderConfigurer" class="org.danielli.xultimate.context.support.EncryptPropertyPlaceholderConfigurer" p:ignoreUnresolvablePlaceholders="true" p:ignoreResourceNotFound="false" abstract="true" />
31 | </beans>
32 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/SolrSynonymDtabaseLoader.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.searching;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.BufferedWriter;
 5 | import java.io.ByteArrayInputStream;
 6 | import java.io.ByteArrayOutputStream;
 7 | import java.io.IOException;
 8 | import java.io.InputStreamReader;
 9 | import java.io.OutputStreamWriter;
10 | import java.io.Writer;
11 | import java.text.ParseException;
12 | import java.util.List;
13 | import java.util.Scanner;
14 | 
15 | import org.apache.lucene.analysis.synonym.SolrSynonymParser;
16 | import org.danielli.xultimate.context.util.ApplicationContextUtils;
17 | import org.danielli.xultimate.context.util.BeanFactoryContext;
18 | import org.danielli.xultimate.searching.po.SynonymKeyword;
19 | import org.danielli.xultimate.searching.service.SynonymKeywordService;
20 | import org.danielli.xultimate.util.collections.CollectionUtils;
21 | import org.slf4j.Logger;
22 | import org.slf4j.LoggerFactory;
23 | 
24 | import com.alibaba.fastjson.util.IOUtils;
25 | 
26 | public class SolrSynonymDtabaseLoader {
27 | 	
28 | 	private static final Logger LOGGER = LoggerFactory.getLogger(SolrSynonymDtabaseLoader.class);
29 | 	
30 | 	public void handle(SolrSynonymParser synonymParser) throws ParseException, IOException {
31 | 		SynonymKeywordService synonymKeywordService = ApplicationContextUtils.getBean(BeanFactoryContext.currentApplicationContext(), SynonymKeywordService.class);
32 | 		LOGGER.info("开始加载相近词词库从数据库");
33 | 		ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
34 | 		Writer writer = new BufferedWriter(new OutputStreamWriter(outputStream));
35 | 		try {
36 | 			for (int pageNo = 1; ; pageNo++) {
37 | 				List<SynonymKeyword> synonymKeywordList = synonymKeywordService.find(pageNo, 10000);
38 | 				if (CollectionUtils.isEmpty(synonymKeywordList)) {
39 | 					break;
40 | 				}
41 | 				for (SynonymKeyword synonymKeyword : synonymKeywordList) {
42 | 					writer.write(synonymKeyword.getKeyword());
43 | 					writer.write("=>");
44 | 					writer.write(synonymKeyword.getSynonymKeyword());
45 | 					writer.write("\n");
46 | 				}
47 | 			}
48 | 			writer.flush();
49 | 			Scanner scanner = new Scanner(new ByteArrayInputStream(outputStream.toByteArray()));
50 | 			while (scanner.hasNextLine()) {
51 | 				System.out.println(scanner.nextLine());
52 | 			}
53 | 			scanner.close();
54 | 			synonymParser.add(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(outputStream.toByteArray()))));
55 | 		} finally {
56 | 			IOUtils.close(writer);
57 | 		}
58 | 		
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/applicationContext-service-generic.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
29 | 		
30 | 	<!-- XML风格支持 -->
31 | 	<aop:config expose-proxy="true" proxy-target-class="true" />
32 | 	<!-- 注解风格支持 -->
33 | 	<aop:aspectj-autoproxy proxy-target-class="true" expose-proxy="true"/> 
34 | 	<context:component-scan base-package="org.danielli.xultimate.context.util" />
35 | 	<context:component-scan base-package="org.danielli.xultimate.searching">
36 | 		<context:exclude-filter type="aspectj" expression="org.danielli.xultimate.web..*"/>
37 | 	</context:component-scan>
38 | </beans>
39 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/dic/quantifier.dic:
--------------------------------------------------------------------------------
  1 | 丈
  2 | 下
  3 | 世
  4 | 世纪
  5 | 两
  6 | 个
  7 | 中
  8 | 串
  9 | 亩
 10 | 人
 11 | 介
 12 | 付
 13 | 代
 14 | 件
 15 | 任
 16 | 份
 17 | 伏
 18 | 伙
 19 | 位
 20 | 位数
 21 | 例
 22 | 倍
 23 | 像素
 24 | 元
 25 | 克
 26 | 克拉
 27 | 公亩
 28 | 公克
 29 | 公分
 30 | 公升
 31 | 公尺
 32 | 公担
 33 | 公斤
 34 | 公里
 35 | 公顷
 36 | 具
 37 | 册
 38 | 出
 39 | 刀
 40 | 分
 41 | 分钟
 42 | 分米
 43 | 划
 44 | 列
 45 | 则
 46 | 刻
 47 | 剂
 48 | 剑
 49 | 副
 50 | 加仑
 51 | 勺
 52 | 包
 53 | 匙
 54 | 匹
 55 | 区
 56 | 千克
 57 | 千米
 58 | 升
 59 | 卷
 60 | 厅
 61 | 厘
 62 | 厘米
 63 | 双
 64 | 发
 65 | 口
 66 | 句
 67 | 只
 68 | 台
 69 | 叶
 70 | 号
 71 | 名
 72 | 吨
 73 | 听
 74 | 员
 75 | 周
 76 | 周年
 77 | 品
 78 | 回
 79 | 团
 80 | 圆
 81 | 圈
 82 | 地
 83 | 场
 84 | 块
 85 | 坪
 86 | 堆
 87 | 声
 88 | 壶
 89 | 处
 90 | 夜
 91 | 大
 92 | 天
 93 | 头
 94 | 套
 95 | 女
 96 | 孔
 97 | 字
 98 | 宗
 99 | 室
100 | 家
101 | 寸
102 | 对
103 | 封
104 | 尊
105 | 小时
106 | 尺
107 | 尾
108 | 局
109 | 层
110 | 届
111 | 岁
112 | 师
113 | 帧
114 | 幅
115 | 幕
116 | 幢
117 | 平方
118 | 平方公尺
119 | 平方公里
120 | 平方分米
121 | 平方厘米
122 | 平方码
123 | 平方米
124 | 平方英寸
125 | 平方英尺
126 | 平方英里
127 | 平米
128 | 年
129 | 年代
130 | 年级
131 | 度
132 | 座
133 | 式
134 | 引
135 | 张
136 | 成
137 | 战
138 | 截
139 | 户
140 | 房
141 | 所
142 | 扇
143 | 手
144 | 打
145 | 批
146 | 把
147 | 折
148 | 担
149 | 拍
150 | 招
151 | 拨
152 | 拳
153 | 指
154 | 掌
155 | 排
156 | 撮
157 | 支
158 | 文
159 | 斗
160 | 斤
161 | 方
162 | 族
163 | 日
164 | 时
165 | 曲
166 | 月
167 | 月份
168 | 期
169 | 本
170 | 朵
171 | 村
172 | 束
173 | 条
174 | 来
175 | 杯
176 | 枚
177 | 枝
178 | 枪
179 | 架
180 | 柄
181 | 柜
182 | 栋
183 | 栏
184 | 株
185 | 样
186 | 根
187 | 格
188 | 案
189 | 桌
190 | 档
191 | 桩
192 | 桶
193 | 梯
194 | 棵
195 | 楼
196 | 次
197 | 款
198 | 步
199 | 段
200 | 毛
201 | 毫
202 | 毫升
203 | 毫米
204 | 毫克
205 | 池
206 | 洲
207 | 派
208 | 海里
209 | 滴
210 | 炮
211 | 点
212 | 点钟
213 | 片
214 | 版
215 | 环
216 | 班
217 | 瓣
218 | 瓶
219 | 生
220 | 男
221 | 画
222 | 界
223 | 盆
224 | 盎司
225 | 盏
226 | 盒
227 | 盘
228 | 相
229 | 眼
230 | 石
231 | 码
232 | 碗
233 | 碟
234 | 磅
235 | 种
236 | 科
237 | 秒
238 | 秒钟
239 | 窝
240 | 立方公尺
241 | 立方分米
242 | 立方厘米
243 | 立方码
244 | 立方米
245 | 立方英寸
246 | 立方英尺
247 | 站
248 | 章
249 | 笔
250 | 等
251 | 筐
252 | 筒
253 | 箱
254 | 篇
255 | 篓
256 | 篮
257 | 簇
258 | 米
259 | 类
260 | 粒
261 | 级
262 | 组
263 | 维
264 | 缕
265 | 缸
266 | 罐
267 | 网
268 | 群
269 | 股
270 | 脚
271 | 船
272 | 艇
273 | 艘
274 | 色
275 | 节
276 | 英亩
277 | 英寸
278 | 英尺
279 | 英里
280 | 行
281 | 袋
282 | 角
283 | 言
284 | 课
285 | 起
286 | 趟
287 | 路
288 | 车
289 | 转
290 | 轮
291 | 辆
292 | 辈
293 | 连
294 | 通
295 | 遍
296 | 部
297 | 里
298 | 重
299 | 针
300 | 钟
301 | 钱
302 | 锅
303 | 门
304 | 间
305 | 队
306 | 阶段
307 | 隅
308 | 集
309 | 页
310 | 顶
311 | 顷
312 | 项
313 | 顿
314 | 颗
315 | 餐
316 | 首


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/applicationContext-service-crypto.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
29 | 
30 | 	<bean id="aesCryptor" class="org.danielli.xultimate.context.crypto.support.SymmetricAlgorithmsCryptor" p:secretKeyString="myKey" p:symmetricAlgorithms="AES" />
31 | 	
32 | 	<bean id="hexCodec" class="org.danielli.xultimate.context.codec.support.HexCodec" />
33 | 	
34 | 	<bean id="byteArrayStringHexCodec" class="org.danielli.xultimate.context.codec.support.ByteArrayStringCoder" p:decoder-ref="hexCodec" p:encoder-ref="hexCodec" />
35 | 	
36 | 	<bean id="stringStringAESCryptor" class="org.danielli.xultimate.context.crypto.support.StringStringCryptor" p:decoder-ref="byteArrayStringHexCodec" p:encoder-ref="byteArrayStringHexCodec" p:decryptor-ref="aesCryptor" p:encryptor-ref="aesCryptor" />
37 | </beans>
38 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/TestUtils.java:
--------------------------------------------------------------------------------
 1 | package org.danielli.xultimate.lucene;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | import java.util.List;
 6 | 
 7 | import org.apache.lucene.document.Document;
 8 | import org.apache.lucene.document.Field.Store;
 9 | import org.apache.lucene.document.LongField;
10 | import org.apache.lucene.document.StringField;
11 | import org.apache.lucene.document.TextField;
12 | import org.apache.lucene.search.Filter;
13 | import org.apache.lucene.search.IndexSearcher;
14 | import org.apache.lucene.search.Query;
15 | import org.apache.lucene.search.ScoreDoc;
16 | import org.apache.lucene.search.SearcherManager;
17 | import org.apache.lucene.search.Sort;
18 | import org.apache.lucene.search.TopDocs;
19 | 
20 | public class TestUtils {
21 | 
22 | 	private static String[] values = { "网站访问量搜索", "近实时搜索" };
23 | 	
24 | 	public static Document[] getDocuments() {
25 | 		Document[] documents = new Document[100];
26 | 		for (int i = 0; i < documents.length; i++) {
27 | 			documents[i] = createDocument((long) i);
28 | 		}
29 | 		return documents;
30 | 	}
31 | 	
32 | 	private static Document createDocument(Long id) {
33 | 		Document document = new Document();
34 | 		document.add(new LongField("time", System.currentTimeMillis(), Store.YES));
35 | 		document.add(new StringField("id", String.valueOf(id), Store.YES));
36 | 		document.add(new StringField("noAnalyzer", values[(int) (id % 2)], Store.YES));
37 | 		document.add(new TextField("analyzer", values[(int) (id % 2)], Store.YES));
38 | 		return document;
39 | 	}
40 | 	
41 | 	private static void release(SearcherManager searcherManager, IndexSearcher indexSearcher) {
42 | 		if (indexSearcher != null) {
43 | 			try {
44 | 				searcherManager.release(indexSearcher);
45 | 			} catch (IOException e) {
46 | 				e.printStackTrace();
47 | 			}
48 | 		} 
49 | 	}
50 | 	
51 | 	public static List<Document> getDocuments(SearcherManager searcherManager, Query query, Sort sort) {
52 | 		IndexSearcher indexSearcher = null;
53 | 		try {
54 | 			indexSearcher = searcherManager.acquire();
55 | 			TopDocs topDocs = indexSearcher.search(query, 200, sort);
56 | 			List<Document> documents = new ArrayList<>();
57 | 			for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
58 | 				documents.add(indexSearcher.doc(scoreDoc.doc));
59 | 			}
60 | 			return documents;
61 | 		} catch (Exception e) {
62 | 			e.printStackTrace();
63 | 			return null;
64 | 		} finally {
65 | 			release(searcherManager, indexSearcher);
66 | 		}
67 | 	}
68 | 	
69 | 	public static List<Long> getIdList(SearcherManager searcherManager, Query query, Filter filter) {
70 | 		IndexSearcher indexSearcher = null;
71 | 		try {
72 | 			indexSearcher = searcherManager.acquire();
73 | 			IdCollector collector = new IdCollector(); 
74 | 			indexSearcher.search(query, filter, collector);
75 | 			return collector.getResult();
76 | 		} catch (Exception e) {
77 | 			e.printStackTrace();
78 | 			return null;
79 | 		} finally {
80 | 			release(searcherManager, indexSearcher);
81 | 		}
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * IK 中文分词  版本 5.0.1
 3 |  * IK Analyzer release 5.0.1
 4 |  * 
 5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 6 |  * contributor license agreements.  See the NOTICE file distributed with
 7 |  * this work for additional information regarding copyright ownership.
 8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 9 |  * (the "License"); you may not use this file except in compliance with
10 |  * the License.  You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  *
20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
21 |  * 版权声明 2012，乌龙茶工作室
22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
23 |  * 
24 |  * 
25 |  */
26 | package org.wltea.analyzer.sample;
27 | 
28 | import java.io.IOException;
29 | import java.io.StringReader;
30 | 
31 | import org.apache.lucene.analysis.Analyzer;
32 | import org.apache.lucene.analysis.TokenStream;
33 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
34 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
35 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
36 | import org.wltea.analyzer.lucene.IKAnalyzer;
37 | 
38 | /**
39 |  * 使用IKAnalyzer进行分词的演示
40 |  * 2012-10-22
41 |  *
42 |  */
43 | public class IKAnalzyerDemo {
44 | 	
45 | 	public static void main(String[] args){
46 | 		//构建IK分词器，使用smart分词模式
47 | 		Analyzer analyzer = new IKAnalyzer(true);
48 | 		
49 | 		//获取Lucene的TokenStream对象
50 | 	    TokenStream ts = null;
51 | 		try {
52 | 			ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子，你可以直接运行它！IKAnalyer can analysis english text too"));
53 | 			//获取词元位置属性
54 | 		    OffsetAttribute  offset = ts.addAttribute(OffsetAttribute.class); 
55 | 		    //获取词元文本属性
56 | 		    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
57 | 		    //获取词元文本属性
58 | 		    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
59 | 		    
60 | 		    
61 | 		    //重置TokenStream（重置StringReader）
62 | 			ts.reset(); 
63 | 			//迭代获取分词结果
64 | 			while (ts.incrementToken()) {
65 | 			  System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
66 | 			}
67 | 			//关闭TokenStream（关闭StringReader）
68 | 			ts.end();   // Perform end-of-stream operations, e.g. set the final offset.
69 | 
70 | 		} catch (IOException e) {
71 | 			e.printStackTrace();
72 | 		} finally {
73 | 			//释放TokenStream的所有资源
74 | 			if(ts != null){
75 | 		      try {
76 | 				ts.close();
77 | 		      } catch (IOException e) {
78 | 				e.printStackTrace();
79 | 		      }
80 | 			}
81 | 	    }
82 | 		
83 | 	}
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/dic/Hit.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  * IK 中文分词  版本 5.0
  4 |  * IK Analyzer release 5.0
  5 |  * 
  6 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  7 |  * contributor license agreements.  See the NOTICE file distributed with
  8 |  * this work for additional information regarding copyright ownership.
  9 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 10 |  * (the "License"); you may not use this file except in compliance with
 11 |  * the License.  You may obtain a copy of the License at
 12 |  *
 13 |  *     http://www.apache.org/licenses/LICENSE-2.0
 14 |  *
 15 |  * Unless required by applicable law or agreed to in writing, software
 16 |  * distributed under the License is distributed on an "AS IS" BASIS,
 17 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 |  * See the License for the specific language governing permissions and
 19 |  * limitations under the License.
 20 |  *
 21 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 22 |  * 版权声明 2012，乌龙茶工作室
 23 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 24 |  * 
 25 |  */
 26 | package org.wltea.analyzer.dic;
 27 | 
 28 | /**
 29 |  * 表示一次词典匹配的命中
 30 |  */
 31 | public class Hit {
 32 | 	//Hit不匹配
 33 | 	private static final int UNMATCH = 0x00000000;
 34 | 	//Hit完全匹配
 35 | 	private static final int MATCH = 0x00000001;
 36 | 	//Hit前缀匹配
 37 | 	private static final int PREFIX = 0x00000010;
 38 | 	
 39 | 	
 40 | 	//该HIT当前状态，默认未匹配
 41 | 	private int hitState = UNMATCH;
 42 | 	
 43 | 	//记录词典匹配过程中，当前匹配到的词典分支节点
 44 | 	private DictSegment matchedDictSegment; 
 45 | 	/*
 46 | 	 * 词段开始位置
 47 | 	 */
 48 | 	private int begin;
 49 | 	/*
 50 | 	 * 词段的结束位置
 51 | 	 */
 52 | 	private int end;
 53 | 	
 54 | 	
 55 | 	/**
 56 | 	 * 判断是否完全匹配
 57 | 	 */
 58 | 	public boolean isMatch() {
 59 | 		return (this.hitState & MATCH) > 0;
 60 | 	}
 61 | 	/**
 62 | 	 * 
 63 | 	 */
 64 | 	public void setMatch() {
 65 | 		this.hitState = this.hitState | MATCH;
 66 | 	}
 67 | 
 68 | 	/**
 69 | 	 * 判断是否是词的前缀
 70 | 	 */
 71 | 	public boolean isPrefix() {
 72 | 		return (this.hitState & PREFIX) > 0;
 73 | 	}
 74 | 	/**
 75 | 	 * 
 76 | 	 */
 77 | 	public void setPrefix() {
 78 | 		this.hitState = this.hitState | PREFIX;
 79 | 	}
 80 | 	/**
 81 | 	 * 判断是否是不匹配
 82 | 	 */
 83 | 	public boolean isUnmatch() {
 84 | 		return this.hitState == UNMATCH ;
 85 | 	}
 86 | 	/**
 87 | 	 * 
 88 | 	 */
 89 | 	public void setUnmatch() {
 90 | 		this.hitState = UNMATCH;
 91 | 	}
 92 | 	
 93 | 	public DictSegment getMatchedDictSegment() {
 94 | 		return matchedDictSegment;
 95 | 	}
 96 | 	
 97 | 	public void setMatchedDictSegment(DictSegment matchedDictSegment) {
 98 | 		this.matchedDictSegment = matchedDictSegment;
 99 | 	}
100 | 	
101 | 	public int getBegin() {
102 | 		return begin;
103 | 	}
104 | 	
105 | 	public void setBegin(int begin) {
106 | 		this.begin = begin;
107 | 	}
108 | 	
109 | 	public int getEnd() {
110 | 		return end;
111 | 	}
112 | 	
113 | 	public void setEnd(int end) {
114 | 		this.end = end;
115 | 	}	
116 | 	
117 | }
118 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/IdFieldComparator.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.lucene;
  2 | 
  3 | import java.io.IOException;
  4 | 
  5 | import org.apache.commons.lang3.ArrayUtils;
  6 | import org.apache.lucene.index.AtomicReaderContext;
  7 | import org.apache.lucene.search.FieldCache;
  8 | import org.apache.lucene.search.FieldComparator;
  9 | 
 10 | public class IdFieldComparator extends FieldComparator<Long> {
 11 | 	private final long[] values;
 12 | 	protected final String field;
 13 | 	private FieldCache.Longs currentReaderValues;
 14 | 	private long bottom;
 15 | 	
 16 | 	private long[] headIds;
 17 | 	
 18 | 	public IdFieldComparator(int numHits, String field, long[] headIds) {
 19 | 		this.field = field;
 20 | 		values = new long[numHits];
 21 | 		this.headIds = headIds;
 22 | 	}
 23 | 	
 24 | 	public int customCompare(long v1, long v2) {
 25 | 		if (v1 == v2) {
 26 | 			return 0;
 27 | 		}
 28 | 		
 29 | 		boolean v1Exists = false;
 30 | 		if (ArrayUtils.contains(headIds, v1)) {
 31 | 			v1Exists = true;
 32 | 		}
 33 | 		boolean v2Exists = false;
 34 | 		if (ArrayUtils.contains(headIds, v2)) {
 35 | 			v2Exists = true;
 36 | 		}
 37 | 		
 38 | 		if (v1Exists && v2Exists) {
 39 | 			return v1 > v2 ? -1 : 1;
 40 | 		} else if (v1Exists) {
 41 | 			return -1;
 42 | 		} else if (v2Exists) {
 43 | 			return 1;
 44 | 		} else {
 45 | 			return v1 > v2 ? -1 : 1;
 46 | 		}
 47 | 	}
 48 | 	
 49 | 	@Override
 50 | 	public int compare(int slot1, int slot2) {
 51 | 		final long v1 = values[slot1];
 52 | 		final long v2 = values[slot2];
 53 | //	    if (v1 > v2) {
 54 | //	    	return 1;
 55 | //	    } else if (v1 < v2) {
 56 | //	        return -1;
 57 | //	    } else {
 58 | //	        return 0;
 59 | //	    }
 60 | 		return customCompare(v1, v2);
 61 | 	}
 62 | 
 63 | 	@Override
 64 | 	public void setBottom(int slot) {
 65 | 		 this.bottom = values[slot];
 66 | 	}
 67 | 
 68 | 	@Override
 69 | 	public int compareBottom(int doc) throws IOException {
 70 | 		long v2 = currentReaderValues.get(doc);
 71 | //		if (bottom > v2) {
 72 | //	        return 1;
 73 | //	    } else if (bottom < v2) {
 74 | //	        return -1;
 75 | //	    } else {
 76 | //	        return 0;
 77 | //	    }
 78 | 		return customCompare(bottom, v2);
 79 | 	}
 80 | 
 81 | 	@Override
 82 | 	public void copy(int slot, int doc) throws IOException {
 83 | 		long v2 = currentReaderValues.get(doc);
 84 | 	    values[slot] = v2;
 85 | 	}
 86 | 
 87 | 	@Override
 88 | 	public FieldComparator<Long> setNextReader(AtomicReaderContext context) throws IOException {
 89 | 		currentReaderValues = FieldCache.DEFAULT.getLongs(context.reader(), field, false);
 90 | 		return this;
 91 | 	}
 92 | 
 93 | 	@Override
 94 | 	public Long value(int slot) {
 95 | 		return Long.valueOf(values[slot]);
 96 | 	}
 97 | 
 98 | 	@Override
 99 | 	public int compareDocToValue(int doc, Long value) throws IOException {
100 | 		final long valueLong = value.longValue();
101 | 	    long docValue = currentReaderValues.get(doc);
102 | //	    if (docValue < valueLong) {
103 | //	        return -1;
104 | //	    } else if (docValue > valueLong) {
105 | //	        return 1;
106 | //	    } else {
107 | //	        return 0;
108 | //	    }
109 | 		return customCompare(docValue, valueLong);
110 | 	}
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/resources/org/danielli/xultimate/lucene/applicationContext-service-lucene1.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd" default-lazy-init="false">
29 | 	<bean id="ikAnalyzer" class="org.wltea.analyzer.lucene.IKAnalyzer" p:useSmart="true"  />
30 | 	
31 | 	<util:constant id="version" static-field="org.apache.lucene.util.Version.LUCENE_35"/>
32 | 	<bean id="indexWriterConfig" class="org.apache.lucene.index.IndexWriterConfig" 
33 | 		c:analyzer-ref="ikAnalyzer" c:matchVersion-ref="version" 
34 | 		p:maxBufferedDocs="1000">
35 | 		<property name="openMode">
36 | 			<util:constant static-field="org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND"/>
37 | 		</property>
38 | 	</bean>
39 | 	<!-- 
40 | 	<bean id="dictionary" class="org.apache.lucene.store.FSDirectory" factory-method="open">
41 | 		<constructor-arg index="0" value="${lucene.directory}" />
42 | 	</bean>
43 | 	 -->
44 | 	<bean id="dictionary" class="org.apache.lucene.store.RAMDirectory" />
45 | 	<bean id="indexWriter" class="org.apache.lucene.index.IndexWriter" destroy-method="close" 
46 | 		c:d-ref="dictionary" c:conf-ref="indexWriterConfig" />
47 | 		
48 | 	<bean id="searcherManager" class="org.apache.lucene.search.SearcherManager" destroy-method="close" 
49 | 		c:writer-ref="indexWriter" c:applyAllDeletes="true">
50 | 		<constructor-arg name="searcherFactory"><null /></constructor-arg>	
51 | 	</bean>
52 | </beans>
53 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/CharacterUtil.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  * 字符集识别工具类
 25 |  */
 26 | package org.wltea.analyzer.core;
 27 | 
 28 | /**
 29 |  *
 30 |  * 字符集识别工具类
 31 |  */
 32 | class CharacterUtil {
 33 | 	
 34 | 	public static final int CHAR_USELESS = 0;
 35 | 	
 36 | 	public static final int CHAR_ARABIC = 0X00000001;
 37 | 	
 38 | 	public static final int CHAR_ENGLISH = 0X00000002;
 39 | 	
 40 | 	public static final int CHAR_CHINESE = 0X00000004;
 41 | 	
 42 | 	public static final int CHAR_OTHER_CJK = 0X00000008;
 43 | 	
 44 | 	
 45 | 	/**
 46 | 	 * 识别字符类型
 47 | 	 * @param input
 48 | 	 * @return int CharacterUtil定义的字符类型常量
 49 | 	 */
 50 | 	static int identifyCharType(char input){
 51 | 		if(input >= '0' && input <= '9'){
 52 | 			return CHAR_ARABIC;
 53 | 			
 54 | 		}else if((input >= 'a' && input <= 'z')
 55 | 				|| (input >= 'A' && input <= 'Z')){
 56 | 			return CHAR_ENGLISH;
 57 | 			
 58 | 		}else {
 59 | 			Character.UnicodeBlock ub = Character.UnicodeBlock.of(input);
 60 | 			
 61 | 			if(ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS  
 62 | 					|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS  
 63 | 					|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A){
 64 | 				//目前已知的中文字符UTF-8集合
 65 | 				return CHAR_CHINESE;
 66 | 				
 67 | 			}else if(ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS //全角数字字符和日韩字符
 68 | 					//韩文字符集
 69 | 					|| ub == Character.UnicodeBlock.HANGUL_SYLLABLES 
 70 | 					|| ub == Character.UnicodeBlock.HANGUL_JAMO
 71 | 					|| ub == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
 72 | 					//日文字符集
 73 | 					|| ub == Character.UnicodeBlock.HIRAGANA //平假名
 74 | 					|| ub == Character.UnicodeBlock.KATAKANA //片假名
 75 | 					|| ub == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS){
 76 | 				return CHAR_OTHER_CJK;
 77 | 				
 78 | 			}
 79 | 		}
 80 | 		//其他的不做处理的字符
 81 | 		return CHAR_USELESS;
 82 | 	}
 83 | 	
 84 | 	/**
 85 | 	 * 进行字符规格化（全角转半角，大写转小写处理）
 86 | 	 * @param input
 87 | 	 * @return char
 88 | 	 */
 89 | 	static char regularize(char input){
 90 |         if (input == 12288) {
 91 |             input = (char) 32;
 92 |             
 93 |         }else if (input > 65280 && input < 65375) {
 94 |             input = (char) (input - 65248);
 95 |             
 96 |         }else if (input >= 'A' && input <= 'Z') {
 97 |         	input += 32;
 98 | 		}
 99 |         
100 |         return input;
101 | 	}
102 | }
103 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/resources/org/danielli/xultimate/lucene/applicationContext-service-lucene2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd" default-lazy-init="false">
29 | 	<bean id="ikAnalyzer" class="org.wltea.analyzer.lucene.IKAnalyzer" p:useSmart="true"  />
30 | 	
31 | 	<util:constant id="version" static-field="org.apache.lucene.util.Version.LUCENE_35"/>
32 | 	<bean id="indexWriterConfig" class="org.apache.lucene.index.IndexWriterConfig" 
33 | 		c:analyzer-ref="ikAnalyzer" c:matchVersion-ref="version" 
34 | 		p:maxBufferedDocs="1000">
35 | 		<property name="openMode">
36 | 			<util:constant static-field="org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND"/>
37 | 		</property>
38 | 	</bean>
39 | 	<!-- 
40 | 	<bean id="dictionary" class="org.apache.lucene.store.FSDirectory" factory-method="open">
41 | 		<constructor-arg index="0" value="${lucene.directory}" />
42 | 	</bean>
43 | 	 -->
44 | 	<bean id="dictionary" class="org.apache.lucene.store.RAMDirectory" />
45 | 	<bean id="indexWriter" class="org.apache.lucene.index.IndexWriter" destroy-method="close" 
46 | 		c:d-ref="dictionary" c:conf-ref="indexWriterConfig" />
47 | 	<bean id="trackingIndexWriter" class="org.apache.lucene.index.TrackingIndexWriter" c:writer-ref="indexWriter" />
48 | 	<bean id="searcherManager" class="org.apache.lucene.search.SearcherManager" destroy-method="close" 
49 | 		c:writer-ref="indexWriter" c:applyAllDeletes="true">
50 | 		<constructor-arg name="searcherFactory"><null /></constructor-arg>	
51 | 	</bean>
52 | </beans>
53 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0.1
  3 |  * IK Analyzer release 5.0.1
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 | 
 25 |  * 
 26 |  */
 27 | package org.wltea.analyzer.lucene;
 28 | 
 29 | import java.io.IOException;
 30 | import java.io.Reader;
 31 | 
 32 | import org.apache.lucene.analysis.Tokenizer;
 33 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 34 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 35 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 36 | 
 37 | import org.wltea.analyzer.core.IKSegmenter;
 38 | import org.wltea.analyzer.core.Lexeme;
 39 | 
 40 | /**
 41 |  * IK分词器 Lucene Tokenizer适配器类
 42 |  * 兼容Lucene 4.0版本
 43 |  */
 44 | public final class IKTokenizer extends Tokenizer {
 45 | 	
 46 | 	//IK分词器实现
 47 | 	private IKSegmenter _IKImplement;
 48 | 	
 49 | 	//词元文本属性
 50 | 	private final CharTermAttribute termAtt;
 51 | 	//词元位移属性
 52 | 	private final OffsetAttribute offsetAtt;
 53 | 	//词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
 54 | 	private final TypeAttribute typeAtt;
 55 | 	//记录最后一个词元的结束位置
 56 | 	private int endPosition;
 57 | 	
 58 | 	/**
 59 | 	 * Lucene 4.0 Tokenizer适配器类构造函数
 60 | 	 * @param in
 61 | 	 * @param useSmart
 62 | 	 */
 63 | 	public IKTokenizer(Reader in , boolean useSmart){
 64 | 	    super(in);
 65 | 	    offsetAtt = addAttribute(OffsetAttribute.class);
 66 | 	    termAtt = addAttribute(CharTermAttribute.class);
 67 | 	    typeAtt = addAttribute(TypeAttribute.class);
 68 | 		_IKImplement = new IKSegmenter(input , useSmart);
 69 | 	}
 70 | 
 71 | 	/* (non-Javadoc)
 72 | 	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
 73 | 	 */
 74 | 	@Override
 75 | 	public boolean incrementToken() throws IOException {
 76 | 		//清除所有的词元属性
 77 | 		clearAttributes();
 78 | 		Lexeme nextLexeme = _IKImplement.next();
 79 | 		if(nextLexeme != null){
 80 | 			//将Lexeme转成Attributes
 81 | 			//设置词元文本
 82 | 			termAtt.append(nextLexeme.getLexemeText());
 83 | 			//设置词元长度
 84 | 			termAtt.setLength(nextLexeme.getLength());
 85 | 			//设置词元位移
 86 | 			offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
 87 | 			//记录分词的最后位置
 88 | 			endPosition = nextLexeme.getEndPosition();
 89 | 			//记录词元分类
 90 | 			typeAtt.setType(nextLexeme.getLexemeTypeString());			
 91 | 			//返会true告知还有下个词元
 92 | 			return true;
 93 | 		}
 94 | 		//返会false告知词元输出完毕
 95 | 		return false;
 96 | 	}
 97 | 	
 98 | 	/*
 99 | 	 * (non-Javadoc)
100 | 	 * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
101 | 	 */
102 | 	@Override
103 | 	public void reset() throws IOException {
104 | 		super.reset();
105 | 		_IKImplement.reset(input);
106 | 	}	
107 | 	
108 | 	@Override
109 | 	public final void end() {
110 | 	    // set final offset
111 | 		int finalOffset = correctOffset(this.endPosition);
112 | 		offsetAtt.setOffset(finalOffset, finalOffset);
113 | 	}
114 | }
115 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/resources/org/danielli/xultimate/lucene/applicationContext-service-lucene3.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd" default-lazy-init="false">
29 | 	<bean id="ikAnalyzer" class="org.wltea.analyzer.lucene.IKAnalyzer" p:useSmart="true"  />
30 | 	
31 | 	<util:constant id="version" static-field="org.apache.lucene.util.Version.LUCENE_35"/>
32 | 	<bean id="indexWriterConfig" class="org.apache.lucene.index.IndexWriterConfig" 
33 | 		c:analyzer-ref="ikAnalyzer" c:matchVersion-ref="version" 
34 | 		p:maxBufferedDocs="1000">
35 | 		<property name="openMode">
36 | 			<util:constant static-field="org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND"/>
37 | 		</property>
38 | 	</bean>
39 | 	<!-- 
40 | 	<bean id="dictionary" class="org.apache.lucene.store.FSDirectory" factory-method="open">
41 | 		<constructor-arg index="0" value="${lucene.directory}" />
42 | 	</bean>
43 | 	 -->
44 | 	<bean id="dictionary" class="org.apache.lucene.store.RAMDirectory" />
45 | 	<bean id="indexWriter" class="org.apache.lucene.index.IndexWriter" destroy-method="close" 
46 | 		c:d-ref="dictionary" c:conf-ref="indexWriterConfig" />
47 | 	<bean id="trackingIndexWriter" class="org.apache.lucene.index.TrackingIndexWriter" c:writer-ref="indexWriter" />
48 | 	<bean id="searcherManager" class="org.apache.lucene.search.SearcherManager" destroy-method="close" 
49 | 		c:writer-ref="indexWriter" c:applyAllDeletes="true">
50 | 		<constructor-arg name="searcherFactory"><null /></constructor-arg>	
51 | 	</bean>
52 | 	
53 | 	<bean id="controlledRealTimeReopenThread" class="org.apache.lucene.search.ControlledRealTimeReopenThread" init-method="start" destroy-method="close"
54 | 		c:writer-ref="trackingIndexWriter" c:manager-ref="searcherManager" c:targetMaxStaleSec="1" c:targetMinStaleSec="0.1"
55 | 		p:daemon="true" p:name="Lucene Add/Update/Delete Thread" />
56 | </beans>
57 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java:
--------------------------------------------------------------------------------
  1 | 
  2 | /**
  3 |  * IK 中文分词  版本 5.0
  4 |  * IK Analyzer release 5.0
  5 |  * 
  6 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  7 |  * contributor license agreements.  See the NOTICE file distributed with
  8 |  * this work for additional information regarding copyright ownership.
  9 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 10 |  * (the "License"); you may not use this file except in compliance with
 11 |  * the License.  You may obtain a copy of the License at
 12 |  *
 13 |  *     http://www.apache.org/licenses/LICENSE-2.0
 14 |  *
 15 |  * Unless required by applicable law or agreed to in writing, software
 16 |  * distributed under the License is distributed on an "AS IS" BASIS,
 17 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 |  * See the License for the specific language governing permissions and
 19 |  * limitations under the License.
 20 |  *
 21 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 22 |  * 版权声明 2012，乌龙茶工作室
 23 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 24 |  * 
 25 |  */
 26 | package org.wltea.analyzer.core;
 27 | 
 28 | import java.util.LinkedList;
 29 | import java.util.List;
 30 | 
 31 | import org.wltea.analyzer.dic.Dictionary;
 32 | import org.wltea.analyzer.dic.Hit;
 33 | 
 34 | 
 35 | /**
 36 |  *  中文-日韩文子分词器
 37 |  */
 38 | class CJKSegmenter implements ISegmenter {
 39 | 	
 40 | 	//子分词器标签
 41 | 	static final String SEGMENTER_NAME = "CJK_SEGMENTER";
 42 | 	//待处理的分词hit队列
 43 | 	private List<Hit> tmpHits;
 44 | 	
 45 | 	
 46 | 	CJKSegmenter(){
 47 | 		this.tmpHits = new LinkedList<Hit>();
 48 | 	}
 49 | 
 50 | 	/* (non-Javadoc)
 51 | 	 * @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
 52 | 	 */
 53 | 	public void analyze(AnalyzeContext context) {
 54 | 		if(CharacterUtil.CHAR_USELESS != context.getCurrentCharType()){
 55 | 			
 56 | 			//优先处理tmpHits中的hit
 57 | 			if(!this.tmpHits.isEmpty()){
 58 | 				//处理词段队列
 59 | 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 60 | 				for(Hit hit : tmpArray){
 61 | 					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 62 | 					if(hit.isMatch()){
 63 | 						//输出当前的词
 64 | 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
 65 | 						context.addLexeme(newLexeme);
 66 | 						
 67 | 						if(!hit.isPrefix()){//不是词前缀，hit不需要继续匹配，移除
 68 | 							this.tmpHits.remove(hit);
 69 | 						}
 70 | 						
 71 | 					}else if(hit.isUnmatch()){
 72 | 						//hit不是词，移除
 73 | 						this.tmpHits.remove(hit);
 74 | 					}					
 75 | 				}
 76 | 			}			
 77 | 			
 78 | 			//*********************************
 79 | 			//再对当前指针位置的字符进行单字匹配
 80 | 			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 81 | 			if(singleCharHit.isMatch()){//首字成词
 82 | 				//输出当前的词
 83 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
 84 | 				context.addLexeme(newLexeme);
 85 | 
 86 | 				//同时也是词前缀
 87 | 				if(singleCharHit.isPrefix()){
 88 | 					//前缀匹配则放入hit列表
 89 | 					this.tmpHits.add(singleCharHit);
 90 | 				}
 91 | 			}else if(singleCharHit.isPrefix()){//首字为词前缀
 92 | 				//前缀匹配则放入hit列表
 93 | 				this.tmpHits.add(singleCharHit);
 94 | 			}
 95 | 			
 96 | 
 97 | 		}else{
 98 | 			//遇到CHAR_USELESS字符
 99 | 			//清空队列
100 | 			this.tmpHits.clear();
101 | 		}
102 | 		
103 | 		//判断缓冲区是否已经读完
104 | 		if(context.isBufferConsumed()){
105 | 			//清空队列
106 | 			this.tmpHits.clear();
107 | 		}
108 | 		
109 | 		//判断是否锁定缓冲区
110 | 		if(this.tmpHits.size() == 0){
111 | 			context.unlockBuffer(SEGMENTER_NAME);
112 | 			
113 | 		}else{
114 | 			context.lockBuffer(SEGMENTER_NAME);
115 | 		}
116 | 	}
117 | 
118 | 	/* (non-Javadoc)
119 | 	 * @see org.wltea.analyzer.core.ISegmenter#reset()
120 | 	 */
121 | 	public void reset() {
122 | 		//清空队列
123 | 		this.tmpHits.clear();
124 | 	}
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/NearRealtimeSearchTest2.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.lucene;
  2 | 
  3 | import java.io.IOException;
  4 | 
  5 | import javax.annotation.Resource;
  6 | 
  7 | import org.apache.lucene.document.Document;
  8 | import org.apache.lucene.index.Term;
  9 | import org.apache.lucene.index.TrackingIndexWriter;
 10 | import org.apache.lucene.queryparser.classic.ParseException;
 11 | import org.apache.lucene.queryparser.classic.QueryParser;
 12 | import org.apache.lucene.search.BooleanQuery;
 13 | import org.apache.lucene.search.FieldComparator;
 14 | import org.apache.lucene.search.FieldComparatorSource;
 15 | import org.apache.lucene.search.QueryWrapperFilter;
 16 | import org.apache.lucene.search.SearcherManager;
 17 | import org.apache.lucene.search.Sort;
 18 | import org.apache.lucene.search.SortField;
 19 | import org.apache.lucene.search.TermQuery;
 20 | import org.apache.lucene.search.BooleanClause.Occur;
 21 | import org.apache.lucene.search.SortField.Type;
 22 | import org.apache.lucene.util.Version;
 23 | import org.junit.Before;
 24 | import org.junit.Test;
 25 | import org.junit.runner.RunWith;
 26 | import org.springframework.test.context.ContextConfiguration;
 27 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 28 | import org.wltea.analyzer.lucene.IKAnalyzer;
 29 | 
 30 | @RunWith(SpringJUnit4ClassRunner.class)
 31 | @ContextConfiguration(locations = { "classpath*:/org/danielli/xultimate/lucene/applicationContext-service-lucene2.xml"})
 32 | public class NearRealtimeSearchTest2 {
 33 | 
 34 | 	@Resource
 35 | 	private TrackingIndexWriter trackingIndexWriter;
 36 | 	
 37 | 	@Resource
 38 | 	private SearcherManager searcherManager;
 39 | 	
 40 | 	@Resource
 41 | 	private IKAnalyzer ikAnalyzer;
 42 | 	
 43 | 	public void saveDocument(Document document) {
 44 | 		try {
 45 | 			trackingIndexWriter.addDocument(document);
 46 | 			searcherManager.maybeRefreshBlocking();
 47 | 		} catch (Exception e) {
 48 | 			e.printStackTrace();
 49 | 		}
 50 | 	}
 51 | 	
 52 | 	public void deleteDocumentById(Long id) {
 53 | 		try {
 54 | 			trackingIndexWriter.deleteDocuments(new Term("id", String.valueOf(id)));
 55 | 			searcherManager.maybeRefreshBlocking();
 56 | 		} catch (Exception e) {
 57 | 			e.printStackTrace();
 58 | 		}
 59 | 	}
 60 | 	
 61 | 	@Before
 62 | 	public void before() {
 63 | 		for (Document document : TestUtils.getDocuments()) {
 64 | 			saveDocument(document);
 65 | 		}
 66 | 	}
 67 | 	
 68 | 	@Test
 69 | 	public void test() throws ParseException {
 70 | 		/* Search */
 71 | 		// 未分词 + 排序
 72 | 		BooleanQuery noAnalyzerBooleanQuery = new BooleanQuery();
 73 | 		noAnalyzerBooleanQuery.add(new TermQuery(new Term("noAnalyzer", "近实时搜索")), Occur.MUST);
 74 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 75 | 		// 删除
 76 | 		deleteDocumentById(1L);
 77 | 		// 未分词 + 排序
 78 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 79 | 		// 分词 + 排序
 80 | 		BooleanQuery analyzerBooleanQuery = new BooleanQuery();
 81 | 		analyzerBooleanQuery.add(new QueryParser(Version.LUCENE_45, "analyzer", ikAnalyzer).parse("近实时搜索"), Occur.MUST);
 82 | 		System.out.println(TestUtils.getDocuments(searcherManager, analyzerBooleanQuery, Sort.RELEVANCE).size());
 83 | 		/* Filter + Collector */
 84 | 		// 自定义Filter + 自定义Collector
 85 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, new IdFilter(new Long[] { 3L, 5L })).size());
 86 | 		// Filter + 自定义Collector
 87 | 		BooleanQuery booleanQuery = new BooleanQuery();
 88 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(3L))), Occur.SHOULD);
 89 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(5L))), Occur.SHOULD);
 90 | 		QueryWrapperFilter filter = new QueryWrapperFilter(booleanQuery);
 91 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, filter).size());
 92 | 		/* 排序 */
 93 | 		// ID倒排
 94 | 		Sort idSort = new Sort(new SortField("id", Type.LONG, true));
 95 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
 96 | 		// 自定义排序
 97 | 		idSort = new Sort(new SortField("id", new FieldComparatorSource() {
 98 | 			
 99 | 			@Override
100 | 			public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
101 | 				return new IdFieldComparator(numHits, fieldname, new long[] { 59L, 57L });
102 | 			}
103 | 		}));
104 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
105 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(1).get("id"));
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/NearRealtimeSearchTest1.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.lucene;
  2 | 
  3 | import java.io.IOException;
  4 | 
  5 | import javax.annotation.Resource;
  6 | 
  7 | import org.apache.lucene.document.Document;
  8 | import org.apache.lucene.index.IndexWriter;
  9 | import org.apache.lucene.index.Term;
 10 | import org.apache.lucene.queryparser.classic.ParseException;
 11 | import org.apache.lucene.queryparser.classic.QueryParser;
 12 | import org.apache.lucene.search.BooleanQuery;
 13 | import org.apache.lucene.search.FieldComparator;
 14 | import org.apache.lucene.search.FieldComparatorSource;
 15 | import org.apache.lucene.search.QueryWrapperFilter;
 16 | import org.apache.lucene.search.SearcherManager;
 17 | import org.apache.lucene.search.Sort;
 18 | import org.apache.lucene.search.SortField;
 19 | import org.apache.lucene.search.SortField.Type;
 20 | import org.apache.lucene.search.TermQuery;
 21 | import org.apache.lucene.search.BooleanClause.Occur;
 22 | import org.apache.lucene.util.Version;
 23 | import org.junit.Before;
 24 | import org.junit.Test;
 25 | import org.junit.runner.RunWith;
 26 | import org.springframework.test.context.ContextConfiguration;
 27 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 28 | import org.wltea.analyzer.lucene.IKAnalyzer;
 29 | 
 30 | @RunWith(SpringJUnit4ClassRunner.class)
 31 | @ContextConfiguration(locations = { "classpath:/org/danielli/xultimate/lucene/applicationContext-service-lucene1.xml"})
 32 | public class NearRealtimeSearchTest1 {
 33 | 
 34 | 	@Resource
 35 | 	private IndexWriter indexWriter;
 36 | 	
 37 | 	@Resource
 38 | 	private SearcherManager searcherManager;
 39 | 	
 40 | 	@Resource
 41 | 	private IKAnalyzer ikAnalyzer;
 42 | 	
 43 | 	public void saveDocument(Document document) {
 44 | 		try {
 45 | 			indexWriter.addDocument(document);
 46 | 			indexWriter.commit();
 47 | 			searcherManager.maybeRefreshBlocking();
 48 | 		} catch (Exception e) {
 49 | 			e.printStackTrace();
 50 | 		}
 51 | 	}
 52 | 	
 53 | 	public void deleteDocumentById(Long id) {
 54 | 		try {
 55 | 			indexWriter.deleteDocuments(new Term("id", String.valueOf(id)));
 56 | 			indexWriter.commit();
 57 | 			searcherManager.maybeRefreshBlocking();
 58 | 		} catch (Exception e) {
 59 | 			e.printStackTrace();
 60 | 		}
 61 | 	}
 62 | 	
 63 | 	@Before
 64 | 	public void before() {
 65 | 		for (Document document : TestUtils.getDocuments()) {
 66 | 			saveDocument(document);
 67 | 		}
 68 | 	}
 69 | 	
 70 | 	@Test
 71 | 	public void test() throws ParseException {
 72 | 		/* Search */
 73 | 		// 未分词 + 排序
 74 | 		BooleanQuery noAnalyzerBooleanQuery = new BooleanQuery();
 75 | 		noAnalyzerBooleanQuery.add(new TermQuery(new Term("noAnalyzer", "近实时搜索")), Occur.MUST);
 76 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 77 | 		// 删除
 78 | 		deleteDocumentById(1L);
 79 | 		// 未分词 + 排序
 80 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 81 | 		// 分词 + 排序
 82 | 		BooleanQuery analyzerBooleanQuery = new BooleanQuery();
 83 | 		analyzerBooleanQuery.add(new QueryParser(Version.LUCENE_45, "analyzer", ikAnalyzer).parse("近实时搜索"), Occur.MUST);
 84 | 		System.out.println(TestUtils.getDocuments(searcherManager, analyzerBooleanQuery, Sort.RELEVANCE).size());
 85 | 		/* Filter + Collector */
 86 | 		// 自定义Filter + 自定义Collector
 87 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, new IdFilter(new Long[] { 3L, 5L })).size());
 88 | 		// Filter + 自定义Collector
 89 | 		BooleanQuery booleanQuery = new BooleanQuery();
 90 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(3L))), Occur.SHOULD);
 91 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(5L))), Occur.SHOULD);
 92 | 		QueryWrapperFilter filter = new QueryWrapperFilter(booleanQuery);
 93 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, filter).size());
 94 | 		/* 排序 */
 95 | 		// ID倒排
 96 | 		Sort idSort = new Sort(new SortField("id", Type.LONG, true));
 97 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
 98 | 		// 自定义排序
 99 | 		idSort = new Sort(new SortField("id", new FieldComparatorSource() {
100 | 			
101 | 			@Override
102 | 			public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
103 | 				return new IdFieldComparator(numHits, fieldname, new long[] { 59L, 57L });
104 | 			}
105 | 		}));
106 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
107 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(1).get("id"));
108 | 	}
109 | }
110 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/danielli/xultimate/searching/SynonymFilterFactory.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.searching;
  2 | 
  3 | import java.io.IOException;
  4 | import java.io.Reader;
  5 | import java.text.ParseException;
  6 | import java.util.HashMap;
  7 | import java.util.Iterator;
  8 | import java.util.Map;
  9 | 
 10 | import org.apache.lucene.analysis.Analyzer;
 11 | import org.apache.lucene.analysis.TokenStream;
 12 | import org.apache.lucene.analysis.Tokenizer;
 13 | import org.apache.lucene.analysis.core.LowerCaseFilter;
 14 | import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 15 | import org.apache.lucene.analysis.synonym.SolrSynonymParser;
 16 | import org.apache.lucene.analysis.synonym.SynonymFilter;
 17 | import org.apache.lucene.analysis.synonym.SynonymMap;
 18 | import org.apache.lucene.analysis.util.ResourceLoader;
 19 | import org.apache.lucene.analysis.util.ResourceLoaderAware;
 20 | import org.apache.lucene.analysis.util.TokenFilterFactory;
 21 | import org.apache.lucene.analysis.util.TokenizerFactory;
 22 | import org.apache.lucene.util.Version;
 23 | 
 24 | public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
 25 | 
 26 | 	private final boolean ignoreCase;
 27 | 	private final String tokenizerFactory;
 28 | 	private final String format;
 29 | 	private final boolean expand;
 30 | 	private final Map<String, String> tokArgs = new HashMap<String, String>();
 31 | 
 32 | 	private SynonymMap map;
 33 | 
 34 | 	public SynonymFilterFactory(Map<String, String> args) {
 35 | 		super(args);
 36 | 		ignoreCase = getBoolean(args, "ignoreCase", false);
 37 | 		format = get(args, "format");
 38 | 		expand = getBoolean(args, "expand", true);
 39 | 
 40 | 		tokenizerFactory = get(args, "tokenizerFactory");
 41 | 		if (tokenizerFactory != null) {
 42 | 			assureMatchVersion();
 43 | 			tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
 44 | 			for (Iterator<String> itr = args.keySet().iterator(); itr.hasNext();) {
 45 | 				String key = itr.next();
 46 | 				tokArgs.put(key.replaceAll("^tokenizerFactory\\.", ""), args.get(key));
 47 | 				itr.remove();
 48 | 			}
 49 | 		}
 50 | 		if (!args.isEmpty()) {
 51 | 			throw new IllegalArgumentException("Unknown parameters: " + args);
 52 | 		}
 53 | 	}
 54 | 
 55 | 	@SuppressWarnings("resource")
 56 | 	@Override
 57 | 	public TokenStream create(TokenStream input) {
 58 | 	    // if the fst is null, it means there's actually no synonyms... just return the original stream
 59 | 	    // as there is nothing to do here.
 60 | 	    return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
 61 | 	}
 62 | 
 63 | 	@SuppressWarnings("resource")
 64 | 	@Override
 65 | 	public void inform(ResourceLoader loader) throws IOException {
 66 | 		final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
 67 | 
 68 | 		Analyzer analyzer = new Analyzer() {
 69 | 			@Override
 70 | 			protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 71 | 				Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_45, reader) : factory.create(reader);
 72 | 				TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_45, tokenizer) : tokenizer;
 73 | 				return new TokenStreamComponents(tokenizer, stream);
 74 | 			}
 75 | 		};
 76 | 
 77 | 		try {
 78 | 			if (format == null || format.equals("solr")) {
 79 | 				// TODO: expose dedup as a parameter?
 80 | 				map = loadSolrSynonyms(loader, true, analyzer);
 81 | 			} else {
 82 | 				// TODO: somehow make this more pluggable
 83 | 				throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
 84 | 			}
 85 | 		} catch (ParseException e) {
 86 | 			throw new IOException("Error parsing synonyms file:", e);
 87 | 		}
 88 | 	}
 89 | 
 90 | 	/**
 91 | 	 * Load synonyms from the solr format, "format=solr".
 92 | 	 */
 93 | 	private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
 94 | 		SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
 95 | 		
 96 | 		SolrSynonymDtabaseLoader solrSynonymDtabaseLoader = new SolrSynonymDtabaseLoader();
 97 | 		solrSynonymDtabaseLoader.handle(parser);
 98 | 		
 99 | 		return parser.build();
100 | 	}
101 | 
102 | 	// (there are no tests for this functionality)
103 | 	private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
104 | 		Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
105 | 		try {
106 | 			TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
107 | 			if (tokFactory instanceof ResourceLoaderAware) {
108 | 				((ResourceLoaderAware) tokFactory).inform(loader);
109 | 			}
110 | 			return tokFactory;
111 | 		} catch (Exception e) {
112 | 			throw new RuntimeException(e);
113 | 		}
114 | 	}
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/IKArbitrator.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | import java.util.Stack;
 28 | import java.util.TreeSet;
 29 | 
 30 | /**
 31 |  * IK分词歧义裁决器
 32 |  */
 33 | class IKArbitrator {
 34 | 
 35 | 	IKArbitrator(){
 36 | 		
 37 | 	}
 38 | 	
 39 | 	/**
 40 | 	 * 分词歧义处理
 41 | 	 * @param orgLexemes
 42 | 	 * @param useSmart
 43 | 	 */
 44 | 	void process(AnalyzeContext context , boolean useSmart){
 45 | 		QuickSortSet orgLexemes = context.getOrgLexemes();
 46 | 		Lexeme orgLexeme = orgLexemes.pollFirst();
 47 | 		
 48 | 		LexemePath crossPath = new LexemePath();
 49 | 		while(orgLexeme != null){
 50 | 			if(!crossPath.addCrossLexeme(orgLexeme)){
 51 | 				//找到与crossPath不相交的下一个crossPath	
 52 | 				if(crossPath.size() == 1 || !useSmart){
 53 | 					//crossPath没有歧义 或者 不做歧义处理
 54 | 					//直接输出当前crossPath
 55 | 					context.addLexemePath(crossPath);
 56 | 				}else{
 57 | 					//对当前的crossPath进行歧义处理
 58 | 					QuickSortSet.Cell headCell = crossPath.getHead();
 59 | 					LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
 60 | 					//输出歧义处理结果judgeResult
 61 | 					context.addLexemePath(judgeResult);
 62 | 				}
 63 | 				
 64 | 				//把orgLexeme加入新的crossPath中
 65 | 				crossPath = new LexemePath();
 66 | 				crossPath.addCrossLexeme(orgLexeme);
 67 | 			}
 68 | 			orgLexeme = orgLexemes.pollFirst();
 69 | 		}
 70 | 		
 71 | 		
 72 | 		//处理最后的path
 73 | 		if(crossPath.size() == 1 || !useSmart){
 74 | 			//crossPath没有歧义 或者 不做歧义处理
 75 | 			//直接输出当前crossPath
 76 | 			context.addLexemePath(crossPath);
 77 | 		}else{
 78 | 			//对当前的crossPath进行歧义处理
 79 | 			QuickSortSet.Cell headCell = crossPath.getHead();
 80 | 			LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
 81 | 			//输出歧义处理结果judgeResult
 82 | 			context.addLexemePath(judgeResult);
 83 | 		}
 84 | 	}
 85 | 	
 86 | 	/**
 87 | 	 * 歧义识别
 88 | 	 * @param lexemeCell 歧义路径链表头
 89 | 	 * @param fullTextLength 歧义路径文本长度
 90 | 	 * @param option 候选结果路径
 91 | 	 * @return
 92 | 	 */
 93 | 	private LexemePath judge(QuickSortSet.Cell lexemeCell , int fullTextLength){
 94 | 		//候选路径集合
 95 | 		TreeSet<LexemePath> pathOptions = new TreeSet<LexemePath>();
 96 | 		//候选结果路径
 97 | 		LexemePath option = new LexemePath();
 98 | 		
 99 | 		//对crossPath进行一次遍历,同时返回本次遍历中有冲突的Lexeme栈
100 | 		Stack<QuickSortSet.Cell> lexemeStack = this.forwardPath(lexemeCell , option);
101 | 		
102 | 		//当前词元链并非最理想的，加入候选路径集合
103 | 		pathOptions.add(option.copy());
104 | 		
105 | 		//存在歧义词，处理
106 | 		QuickSortSet.Cell c = null;
107 | 		while(!lexemeStack.isEmpty()){
108 | 			c = lexemeStack.pop();
109 | 			//回滚词元链
110 | 			this.backPath(c.getLexeme() , option);
111 | 			//从歧义词位置开始，递归，生成可选方案
112 | 			this.forwardPath(c , option);
113 | 			pathOptions.add(option.copy());
114 | 		}
115 | 		
116 | 		//返回集合中的最优方案
117 | 		return pathOptions.first();
118 | 
119 | 	}
120 | 	
121 | 	/**
122 | 	 * 向前遍历，添加词元，构造一个无歧义词元组合
123 | 	 * @param LexemePath path
124 | 	 * @return
125 | 	 */
126 | 	private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell , LexemePath option){
127 | 		//发生冲突的Lexeme栈
128 | 		Stack<QuickSortSet.Cell> conflictStack = new Stack<QuickSortSet.Cell>();
129 | 		QuickSortSet.Cell c = lexemeCell;
130 | 		//迭代遍历Lexeme链表
131 | 		while(c != null && c.getLexeme() != null){
132 | 			if(!option.addNotCrossLexeme(c.getLexeme())){
133 | 				//词元交叉，添加失败则加入lexemeStack栈
134 | 				conflictStack.push(c);
135 | 			}
136 | 			c = c.getNext();
137 | 		}
138 | 		return conflictStack;
139 | 	}
140 | 	
141 | 	/**
142 | 	 * 回滚词元链，直到它能够接受指定的词元
143 | 	 * @param lexeme 
144 | 	 * @param l
145 | 	 */
146 | 	private void backPath(Lexeme l  , LexemePath option){
147 | 		while(option.checkCross(l)){
148 | 			option.removeTail();
149 | 		}
150 | 		
151 | 	}
152 | 	
153 | }
154 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/applicationContext-dao-base.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
29 | 		
30 | 	<!-- 创建数据源 -->
31 | 	<!-- 使用JBoss、WebLogic时使用应用服务器本身提供的数据源。
32 | 	<jee:jndi-lookup id="dataSource" jndi-name="java:comp/env/jdbc/bbt" />
33 | 	 -->
34 | 	 
35 | 	<!-- 
36 | 		slowSqlMillis：定义慢SQL的标准
37 | 		logSlowSql：慢SQL记录
38 | 		mergeSql：SQL合并配置
39 | 	 -->
40 | 	<bean id="statFilter" class="com.alibaba.druid.filter.stat.StatFilter" p:logSlowSql="true" p:mergeSql="true" p:slowSqlMillis="1000" />
41 | 	<!-- 
42 | 		logViolation：怀疑攻击的SQL记录到日志
43 | 		throwException：怀疑攻击的SQL抛异常
44 | 	 -->
45 | 	<bean id="wallFilter" class="com.alibaba.druid.wall.WallFilter" p:dbType="mysql" p:logViolation="true" p:throwException="false" />
46 | 	<!-- 
47 | 		statementExecutableSqlLogEnable：记录所有可执行的SQL
48 | 	 -->
49 | 	<bean id="logFilter" class="com.alibaba.druid.filter.logging.Slf4jLogFilter" p:statementExecutableSqlLogEnable="true" />
50 | 	<bean id="abstractDataSource" abstract="true" class="com.alibaba.druid.pool.DruidDataSource" init-method="init" destroy-method="close">   	    
51 | 	    <!-- 配置获取连接等待超时的时间 -->
52 | 	    <property name="maxWait" value="60000" />
53 | 		
54 | 		<!-- 配置间隔多久才进行一次检测，检测需要关闭的空闲连接，单位是毫秒 -->
55 | 	    <property name="timeBetweenEvictionRunsMillis" value="60000" />
56 | 	    
57 | 	    <!-- 配置一个连接在池中最小生存的时间，单位是毫秒 -->
58 | 	    <property name="minEvictableIdleTimeMillis" value="300000" />
59 | 	 
60 | 	    <property name="validationQuery" value="SELECT 'x'" />
61 | 	    <property name="testWhileIdle" value="true" />
62 | 	    <property name="testOnBorrow" value="false" />
63 | 	    <property name="testOnReturn" value="false" />
64 | 	    <!-- 定期将监控数据输出到日志中 -->
65 | 	    <property name="timeBetweenLogStatsMillis" value="300000" />
66 | 	    
67 | 	    <!-- 当程序存在缺陷时，申请的连接忘记关闭，这时候，就存在连接泄漏了。Druid提供了RemoveAbandanded相关配置，用来关闭长时间不使用的连接。 -->
68 | 	    <property name="removeAbandoned" value="true" /> <!-- 打开removeAbandoned功能 -->
69 |     	<property name="removeAbandonedTimeout" value="1800" /> <!-- 1800秒，也就是30分钟 -->
70 |     	<property name="logAbandoned" value="true" /> <!-- 关闭abanded连接时输出错误日志 -->
71 | 	    
72 | 	    <!-- 打开PSCache，并且指定每个连接上PSCache的大小。如果用Oracle，则把poolPreparedStatements配置为true，mysql可以配置为false。分库分表较多的数据库，建议配置为false。 -->
73 | 	    <property name="poolPreparedStatements" value="false" />
74 | 	    <property name="maxPoolPreparedStatementPerConnectionSize" value="20" />
75 | 	    
76 | 	    <!-- 配置监控统计拦截的filters -->
77 | 	    <property name="proxyFilters">
78 | 	        <list>
79 | 	        	<ref bean="wallFilter"/>
80 | 	            <ref bean="statFilter" />
81 | 	            <ref bean="logFilter" />
82 | 	        </list>
83 | 	    </property>
84 | 	    <!-- 合并多个DruidDataSource的监控数据，无法和timeBetweenLogStatsMillis共用 -->
85 | 	    <property name="useGloalDataSourceStat" value="false" />
86 | 	</bean>
87 | </beans>
88 | 


--------------------------------------------------------------------------------
/xultimate-lucene/src/test/java/org/danielli/xultimate/lucene/NearRealtimeSearchTest3.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.lucene;
  2 | 
  3 | import java.io.IOException;
  4 | 
  5 | import javax.annotation.Resource;
  6 | 
  7 | import org.apache.lucene.document.Document;
  8 | import org.apache.lucene.index.Term;
  9 | import org.apache.lucene.index.TrackingIndexWriter;
 10 | import org.apache.lucene.queryparser.classic.ParseException;
 11 | import org.apache.lucene.queryparser.classic.QueryParser;
 12 | import org.apache.lucene.search.BooleanClause.Occur;
 13 | import org.apache.lucene.search.BooleanQuery;
 14 | import org.apache.lucene.search.FieldComparator;
 15 | import org.apache.lucene.search.FieldComparatorSource;
 16 | import org.apache.lucene.search.QueryWrapperFilter;
 17 | import org.apache.lucene.search.SearcherManager;
 18 | import org.apache.lucene.search.Sort;
 19 | import org.apache.lucene.search.SortField;
 20 | import org.apache.lucene.search.SortField.Type;
 21 | import org.apache.lucene.search.TermQuery;
 22 | import org.apache.lucene.util.Version;
 23 | import org.junit.Before;
 24 | import org.junit.Test;
 25 | import org.junit.runner.RunWith;
 26 | import org.springframework.test.context.ContextConfiguration;
 27 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 28 | import org.wltea.analyzer.lucene.IKAnalyzer;
 29 | 
 30 | @RunWith(SpringJUnit4ClassRunner.class)
 31 | @ContextConfiguration(locations = { "classpath*:/org/danielli/xultimate/lucene/applicationContext-service-lucene3.xml"})
 32 | public class NearRealtimeSearchTest3 {
 33 | 
 34 | 	@Resource
 35 | 	private TrackingIndexWriter trackingIndexWriter;
 36 | 	
 37 | 	@Resource
 38 | 	private SearcherManager searcherManager;
 39 | 	
 40 | 	@Resource
 41 | 	private IKAnalyzer ikAnalyzer;
 42 | 	
 43 | 	public void saveDocument(Document document) {
 44 | 		try {
 45 | 			trackingIndexWriter.addDocument(document);
 46 | 		} catch (Exception e) {
 47 | 			e.printStackTrace();
 48 | 		}
 49 | 	}
 50 | 	
 51 | 	public void deleteDocumentById(Long id) {
 52 | 		try {
 53 | 			trackingIndexWriter.deleteDocuments(new Term("id", String.valueOf(id)));
 54 | 		} catch (Exception e) {
 55 | 			e.printStackTrace();
 56 | 		}
 57 | 	}
 58 | 	
 59 | 	@Before
 60 | 	public void before() {
 61 | 		for (Document document : TestUtils.getDocuments()) {
 62 | 			saveDocument(document);
 63 | 		}
 64 | 	}
 65 | 	
 66 | 	@Test
 67 | 	public void test() throws InterruptedException, ParseException {
 68 | 		/* Search */
 69 | 		// 未分词 + 排序
 70 | 		BooleanQuery noAnalyzerBooleanQuery = new BooleanQuery();
 71 | 		noAnalyzerBooleanQuery.add(new TermQuery(new Term("noAnalyzer", "近实时搜索")), Occur.MUST);
 72 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 73 | 		Thread.sleep(3 * 1000);
 74 | 		// 未分词 + 排序
 75 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 76 | 		// 删除
 77 | 		deleteDocumentById(1L);
 78 | 		// 未分词 + 排序
 79 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 80 | 		Thread.sleep(3 * 1000);
 81 | 		// 未分词 + 排序
 82 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, Sort.RELEVANCE).size());
 83 | 		// 分词 + 排序
 84 | 		BooleanQuery analyzerBooleanQuery = new BooleanQuery();
 85 | 		analyzerBooleanQuery.add(new QueryParser(Version.LUCENE_45, "analyzer", ikAnalyzer).parse("近实时搜索"), Occur.MUST);
 86 | 		System.out.println(TestUtils.getDocuments(searcherManager, analyzerBooleanQuery, Sort.RELEVANCE).size());		
 87 | 		/* Filter + Collector */
 88 | 		// 自定义Filter + 自定义Collector
 89 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, new IdFilter(new Long[] { 3L, 5L })).size());
 90 | 		// Filter + 自定义Collector
 91 | 		BooleanQuery booleanQuery = new BooleanQuery();
 92 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(3L))), Occur.SHOULD);
 93 | 		booleanQuery.add(new TermQuery(new Term("id", String.valueOf(5L))), Occur.SHOULD);
 94 | 		QueryWrapperFilter filter = new QueryWrapperFilter(booleanQuery);
 95 | 		System.out.println(TestUtils.getIdList(searcherManager, noAnalyzerBooleanQuery, filter).size());
 96 | 		/* 排序 */
 97 | 		// ID倒排
 98 | 		Sort idSort = new Sort(new SortField("id", Type.LONG, true));
 99 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
100 | 		// 自定义排序
101 | 		idSort = new Sort(new SortField("id", new FieldComparatorSource() {
102 | 			
103 | 			@Override
104 | 			public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
105 | 				return new IdFieldComparator(numHits, fieldname, new long[] { 59L, 57L });
106 | 			}
107 | 		}));
108 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(0).get("id"));
109 | 		System.out.println(TestUtils.getDocuments(searcherManager, noAnalyzerBooleanQuery, idSort).get(1).get("id"));
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/IKSegmenter.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  */
 24 | package org.wltea.analyzer.core;
 25 | 
 26 | import java.io.IOException;
 27 | import java.io.Reader;
 28 | import java.util.ArrayList;
 29 | import java.util.List;
 30 | 
 31 | import org.wltea.analyzer.cfg.Configuration;
 32 | import org.wltea.analyzer.cfg.DefaultConfig;
 33 | import org.wltea.analyzer.dic.Dictionary;
 34 | 
 35 | /**
 36 |  * IK分词器主类
 37 |  *
 38 |  */
 39 | public final class IKSegmenter {
 40 | 	
 41 | 	//字符窜reader
 42 | 	private Reader input;
 43 | 	//分词器配置项
 44 | 	private Configuration cfg;
 45 | 	//分词器上下文
 46 | 	private AnalyzeContext context;
 47 | 	//分词处理器列表
 48 | 	private List<ISegmenter> segmenters;
 49 | 	//分词歧义裁决器
 50 | 	private IKArbitrator arbitrator;
 51 | 	
 52 | 
 53 | 	/**
 54 | 	 * IK分词器构造函数
 55 | 	 * @param input 
 56 | 	 * @param useSmart 为true，使用智能分词策略
 57 | 	 * 
 58 | 	 * 非智能分词：细粒度输出所有可能的切分结果
 59 | 	 * 智能分词： 合并数词和量词，对分词结果进行歧义判断
 60 | 	 */
 61 | 	public IKSegmenter(Reader input , boolean useSmart){
 62 | 		this.input = input;
 63 | 		this.cfg = DefaultConfig.getInstance();
 64 | 		this.cfg.setUseSmart(useSmart);
 65 | 		this.init();
 66 | 	}
 67 | 	
 68 | 	/**
 69 | 	 * IK分词器构造函数
 70 | 	 * @param input
 71 | 	 * @param cfg 使用自定义的Configuration构造分词器
 72 | 	 * 
 73 | 	 */
 74 | 	public IKSegmenter(Reader input , Configuration cfg){
 75 | 		this.input = input;
 76 | 		this.cfg = cfg;
 77 | 		this.init();
 78 | 	}
 79 | 	
 80 | 	/**
 81 | 	 * 初始化
 82 | 	 */
 83 | 	private void init(){
 84 | 		//初始化词典单例
 85 | 		Dictionary.initial(this.cfg);
 86 | 		//初始化分词上下文
 87 | 		this.context = new AnalyzeContext(this.cfg);
 88 | 		//加载子分词器
 89 | 		this.segmenters = this.loadSegmenters();
 90 | 		//加载歧义裁决器
 91 | 		this.arbitrator = new IKArbitrator();
 92 | 	}
 93 | 	
 94 | 	/**
 95 | 	 * 初始化词典，加载子分词器实现
 96 | 	 * @return List<ISegmenter>
 97 | 	 */
 98 | 	private List<ISegmenter> loadSegmenters(){
 99 | 		List<ISegmenter> segmenters = new ArrayList<ISegmenter>(4);
100 | 		//处理字母的子分词器
101 | 		segmenters.add(new LetterSegmenter()); 
102 | 		//处理中文数量词的子分词器
103 | 		segmenters.add(new CN_QuantifierSegmenter());
104 | 		//处理中文词的子分词器
105 | 		segmenters.add(new CJKSegmenter());
106 | 		return segmenters;
107 | 	}
108 | 	
109 | 	/**
110 | 	 * 分词，获取下一个词元
111 | 	 * @return Lexeme 词元对象
112 | 	 * @throws IOException
113 | 	 */
114 | 	public synchronized Lexeme next()throws IOException{
115 | 		Lexeme l = null;
116 | 		while((l = context.getNextLexeme()) == null ){
117 | 			/*
118 | 			 * 从reader中读取数据，填充buffer
119 | 			 * 如果reader是分次读入buffer的，那么buffer要  进行移位处理
120 | 			 * 移位处理上次读入的但未处理的数据
121 | 			 */
122 | 			int available = context.fillBuffer(this.input);
123 | 			if(available <= 0){
124 | 				//reader已经读完
125 | 				context.reset();
126 | 				return null;
127 | 				
128 | 			}else{
129 | 				//初始化指针
130 | 				context.initCursor();
131 | 				do{
132 |         			//遍历子分词器
133 |         			for(ISegmenter segmenter : segmenters){
134 |         				segmenter.analyze(context);
135 |         			}
136 |         			//字符缓冲区接近读完，需要读入新的字符
137 |         			if(context.needRefillBuffer()){
138 |         				break;
139 |         			}
140 |    				//向前移动指针
141 | 				}while(context.moveCursor());
142 | 				//重置子分词器，为下轮循环进行初始化
143 | 				for(ISegmenter segmenter : segmenters){
144 | 					segmenter.reset();
145 | 				}
146 | 			}
147 | 			//对分词进行歧义处理
148 | 			this.arbitrator.process(context, this.cfg.useSmart());			
149 | 			//将分词结果输出到结果集，并处理未切分的单个CJK字符
150 | 			context.outputToResult();
151 | 			//记录本次分词的缓冲区位移
152 | 			context.markBufferOffset();			
153 | 		}
154 | 		return l;
155 | 	}
156 | 
157 | 	/**
158 |      * 重置分词器到初始状态
159 |      * @param input
160 |      */
161 | 	public synchronized void reset(Reader input) {
162 | 		this.input = input;
163 | 		context.reset();
164 | 		for(ISegmenter segmenter : segmenters){
165 | 			segmenter.reset();
166 | 		}
167 | 	}
168 | }
169 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/cfg/DefaultConfig.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  * 
 25 |  */
 26 | package org.wltea.analyzer.cfg;
 27 | 
 28 | import java.io.IOException;
 29 | import java.io.InputStream;
 30 | import java.util.ArrayList;
 31 | import java.util.InvalidPropertiesFormatException;
 32 | import java.util.List;
 33 | import java.util.Properties;
 34 | 
 35 | /**
 36 |  * Configuration 默认实现
 37 |  * 2012-5-8
 38 |  *
 39 |  */
 40 | public class DefaultConfig implements Configuration{
 41 | 
 42 | 	/*
 43 | 	 * 分词器默认字典路径 
 44 | 	 */
 45 | 	private static final String PATH_DIC_MAIN = "org/wltea/analyzer/dic/main2012.dic";
 46 | 	private static final String PATH_DIC_QUANTIFIER = "org/wltea/analyzer/dic/quantifier.dic";
 47 | 
 48 | 	/*
 49 | 	 * 分词器配置文件路径
 50 | 	 */	
 51 | 	private static final String FILE_NAME = "IKAnalyzer.cfg.xml";
 52 | 	//配置属性——扩展字典
 53 | 	private static final String EXT_DICT = "ext_dict";
 54 | 	//配置属性——扩展停止词典
 55 | 	private static final String EXT_STOP = "ext_stopwords";
 56 | 	
 57 | 	private Properties props;
 58 | 	/*
 59 | 	 * 是否使用smart方式分词
 60 | 	 */
 61 | 	private boolean useSmart;
 62 | 	
 63 | 	/**
 64 | 	 * 返回单例
 65 | 	 * @return Configuration单例
 66 | 	 */
 67 | 	public static Configuration getInstance(){
 68 | 		return new DefaultConfig();
 69 | 	}
 70 | 	
 71 | 	/*
 72 | 	 * 初始化配置文件
 73 | 	 */
 74 | 	private DefaultConfig(){		
 75 | 		props = new Properties();
 76 | 		
 77 | 		InputStream input = this.getClass().getClassLoader().getResourceAsStream(FILE_NAME);
 78 | 		if(input != null){
 79 | 			try {
 80 | 				props.loadFromXML(input);
 81 | 			} catch (InvalidPropertiesFormatException e) {
 82 | 				e.printStackTrace();
 83 | 			} catch (IOException e) {
 84 | 				e.printStackTrace();
 85 | 			}
 86 | 		}
 87 | 	}
 88 | 
 89 | 	
 90 | 	/**
 91 | 	 * 返回useSmart标志位
 92 | 	 * useSmart =true ，分词器使用智能切分策略， =false则使用细粒度切分
 93 | 	 * @return useSmart
 94 | 	 */
 95 | 	public boolean useSmart() {
 96 | 		return useSmart;
 97 | 	}
 98 | 
 99 | 	/**
100 | 	 * 设置useSmart标志位
101 | 	 * useSmart =true ，分词器使用智能切分策略， =false则使用细粒度切分
102 | 	 * @param useSmart
103 | 	 */
104 | 	public void setUseSmart(boolean useSmart) {
105 | 		this.useSmart = useSmart;
106 | 	}	
107 | 	
108 | 	/**
109 | 	 * 获取主词典路径
110 | 	 * 
111 | 	 * @return String 主词典路径
112 | 	 */
113 | 	public String getMainDictionary(){
114 | 		return PATH_DIC_MAIN;
115 | 	}
116 | 
117 | 	/**
118 | 	 * 获取量词词典路径
119 | 	 * @return String 量词词典路径
120 | 	 */
121 | 	public String getQuantifierDicionary(){
122 | 		return PATH_DIC_QUANTIFIER;
123 | 	}
124 | 
125 | 	/**
126 | 	 * 获取扩展字典配置路径
127 | 	 * @return List<String> 相对类加载器的路径
128 | 	 */
129 | 	public List<String> getExtDictionarys(){
130 | 		List<String> extDictFiles = new ArrayList<String>(2);
131 | 		String extDictCfg = props.getProperty(EXT_DICT);
132 | 		if(extDictCfg != null){
133 | 			//使用;分割多个扩展字典配置
134 | 			String[] filePaths = extDictCfg.split(";");
135 | 			if(filePaths != null){
136 | 				for(String filePath : filePaths){
137 | 					if(filePath != null && !"".equals(filePath.trim())){
138 | 						extDictFiles.add(filePath.trim());
139 | 					}
140 | 				}
141 | 			}
142 | 		}		
143 | 		return extDictFiles;		
144 | 	}
145 | 
146 | 
147 | 	/**
148 | 	 * 获取扩展停止词典配置路径
149 | 	 * @return List<String> 相对类加载器的路径
150 | 	 */
151 | 	public List<String> getExtStopWordDictionarys(){
152 | 		List<String> extStopWordDictFiles = new ArrayList<String>(2);
153 | 		String extStopWordDictCfg = props.getProperty(EXT_STOP);
154 | 		if(extStopWordDictCfg != null){
155 | 			//使用;分割多个扩展字典配置
156 | 			String[] filePaths = extStopWordDictCfg.split(";");
157 | 			if(filePaths != null){
158 | 				for(String filePath : filePaths){
159 | 					if(filePath != null && !"".equals(filePath.trim())){
160 | 						extStopWordDictFiles.add(filePath.trim());
161 | 					}
162 | 				}
163 | 			}
164 | 		}		
165 | 		return extStopWordDictFiles;		
166 | 	}
167 | 			
168 | 
169 | }
170 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>org.danielli.xultimate</groupId>
  6 | 	<artifactId>xultimate-searching</artifactId>
  7 | 	<version>1.0.0-SNAPSHOT</version>
  8 | 	<packaging>pom</packaging>
  9 | 
 10 | 	<name>The X-Ultimate Searching Parent</name>
 11 | 	<description>The X-Ultimate Searching parent project.</description>
 12 | 	<url>https://github.com/daniellitoc/xultimate-searching/</url>
 13 | 
 14 | 
 15 | 	<modules>
 16 | 		<module>xultimate-lucene</module>
 17 | 		<module>xultimate-ikanalyzer</module>
 18 | 		<module>xultimate-solr</module>
 19 | 	</modules>
 20 | 
 21 | 	<properties>
 22 | 		<jdk.version>1.7</jdk.version>
 23 | 		<junit.version>4.11</junit.version>
 24 | 		<org.springframework.version>3.2.3.RELEASE</org.springframework.version>
 25 | 		<org.apache.lucene.version>4.5.0</org.apache.lucene.version>
 26 | 		<org.apache.solr.version>4.5.0</org.apache.solr.version>
 27 | 	</properties>
 28 | 
 29 | 	<dependencyManagement>
 30 | 		<dependencies>
 31 | 			<dependency>
 32 | 				<groupId>org.apache.lucene</groupId>
 33 | 				<artifactId>lucene-core</artifactId>
 34 | 				<version>${org.apache.lucene.version}</version>
 35 | 			</dependency>
 36 | 			<dependency>
 37 | 				<groupId>org.apache.lucene</groupId>
 38 | 				<artifactId>lucene-queryparser</artifactId>
 39 | 				<version>${org.apache.lucene.version}</version>
 40 | 			</dependency>
 41 | 			<dependency>
 42 | 				<groupId>org.apache.lucene</groupId>
 43 | 				<artifactId>lucene-analyzers-common</artifactId>
 44 | 				<version>${org.apache.lucene.version}</version>
 45 | 			</dependency>
 46 | 			<dependency>
 47 | 				<groupId>org.apache.solr</groupId>
 48 | 				<artifactId>solr-solrj</artifactId>
 49 | 				<version>${org.apache.solr.version}</version>
 50 | 				<exclusions>
 51 | 					<exclusion>
 52 | 						  <groupId>log4j</groupId>
 53 | 						  <artifactId>log4j</artifactId>
 54 | 					</exclusion>
 55 | 				</exclusions>
 56 | 			</dependency>
 57 | 			<dependency>
 58 | 				<groupId>junit</groupId>
 59 | 				<artifactId>junit</artifactId>
 60 | 				<version>${junit.version}</version>
 61 | 				<scope>test</scope>
 62 | 			</dependency>
 63 | 			<dependency>
 64 | 				<groupId>org.springframework</groupId>
 65 | 				<artifactId>spring-test</artifactId>
 66 | 				<version>${org.springframework.version}</version>
 67 | 				<scope>test</scope>
 68 | 			</dependency>
 69 | 		</dependencies>
 70 | 	</dependencyManagement>
 71 | 
 72 | 	<build>
 73 | 		<resources>
 74 | 			<resource>
 75 | 				<directory>src/main/resources</directory>
 76 | 				<filtering>true</filtering>
 77 | 			</resource>
 78 | 		</resources>
 79 | 		<testResources>
 80 | 			<testResource>
 81 | 				<directory>src/test/resources</directory>
 82 | 				<filtering>true</filtering>
 83 | 			</testResource>
 84 | 		</testResources>
 85 | 		<plugins>
 86 | 			<plugin>
 87 | 				<groupId>org.apache.maven.plugins</groupId>
 88 | 				<artifactId>maven-compiler-plugin</artifactId>
 89 | 				<configuration>
 90 | 					<source>${jdk.version}</source>
 91 | 					<target>${jdk.version}</target>
 92 | 				</configuration>
 93 | 			</plugin>
 94 | 			<plugin>
 95 | 				<groupId>org.apache.maven.plugins</groupId>
 96 | 				<artifactId>maven-resources-plugin</artifactId>
 97 | 				<configuration>
 98 | 					<encoding>UTF-8</encoding>
 99 | 				</configuration>
100 | 			</plugin>
101 | 			<plugin>
102 | 				<groupId>org.apache.maven.plugins</groupId>
103 | 				<artifactId>maven-source-plugin</artifactId>
104 | 				<executions>
105 | 					<execution>
106 | 						<id>attach-sources</id>
107 | 						<phase>verify</phase>
108 | 						<goals>
109 | 							<goal>jar-no-fork</goal>
110 | 						</goals>
111 | 					</execution>
112 | 				</executions>
113 | 			</plugin>
114 | 			<plugin>
115 | 				<groupId>org.apache.maven.plugins</groupId>
116 | 				<artifactId>maven-javadoc-plugin</artifactId>
117 | 				<executions>
118 | 					<execution>
119 | 						<id>attach-javadocs</id>
120 | 						<goals>
121 | 							<goal>jar</goal>
122 | 						</goals>
123 | 					</execution>
124 | 				</executions>
125 | 			</plugin>
126 | 			<plugin>
127 | 				<groupId>org.apache.maven.plugins</groupId>
128 | 				<artifactId>maven-jar-plugin</artifactId>
129 | 				<version>2.2</version>
130 | 				<executions>
131 | 					<execution>
132 | 						<goals>
133 | 							<goal>test-jar</goal>
134 | 						</goals>
135 | 					</execution>
136 | 				</executions>
137 | 			</plugin>
138 | 		</plugins>
139 | 	</build>
140 | 
141 | 	<distributionManagement>
142 | 		<repository>
143 | 			<id>nexus-releases</id>
144 | 			<name>Nexus Release Repository</name>
145 | 			<url>http://localhost/nexus/content/repositories/releases</url>
146 | 		</repository>
147 | 		<snapshotRepository>
148 | 			<id>nexus-snapshots</id>
149 | 			<name>Nexus Snapshots Repository</name>
150 | 			<url>http://localhost/nexus/content/repositories/snapshots</url>
151 | 		</snapshotRepository>
152 | 	</distributionManagement>
153 | </project>


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.query;
 26 | 
 27 | import java.io.IOException;
 28 | import java.io.StringReader;
 29 | import java.util.ArrayList;
 30 | import java.util.List;
 31 | 
 32 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
 33 | import org.apache.lucene.queryparser.classic.ParseException;
 34 | import org.apache.lucene.queryparser.classic.QueryParser;
 35 | import org.apache.lucene.search.Query;
 36 | import org.apache.lucene.util.Version;
 37 | import org.wltea.analyzer.core.IKSegmenter;
 38 | import org.wltea.analyzer.core.Lexeme;
 39 | 
 40 | /**
 41 |  * Single Word Multi Char Query Builder
 42 |  * IK分词算法专用
 43 |  * @author linliangyi
 44 |  *
 45 |  */
 46 | public class SWMCQueryBuilder {
 47 | 
 48 | 	/**
 49 | 	 * 生成SWMCQuery
 50 | 	 * @param fieldName
 51 | 	 * @param keywords
 52 | 	 * @param quickMode
 53 | 	 * @return Lucene Query
 54 | 	 */
 55 | 	public static Query create(String fieldName ,String keywords , boolean quickMode){
 56 | 		if(fieldName == null || keywords == null){
 57 | 			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
 58 | 		}
 59 | 		//1.对keywords进行分词处理
 60 | 		List<Lexeme> lexemes = doAnalyze(keywords);
 61 | 		//2.根据分词结果，生成SWMCQuery
 62 | 		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
 63 | 		return _SWMCQuery;
 64 | 	}
 65 | 	
 66 | 	/**
 67 | 	 * 分词切分，并返回结链表
 68 | 	 * @param keywords
 69 | 	 * @return
 70 | 	 */
 71 | 	private static List<Lexeme> doAnalyze(String keywords){
 72 | 		List<Lexeme> lexemes = new ArrayList<Lexeme>();
 73 | 		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
 74 | 		try{
 75 | 			Lexeme l = null;
 76 | 			while( (l = ikSeg.next()) != null){
 77 | 				lexemes.add(l);
 78 | 			}
 79 | 		}catch(IOException e){
 80 | 			e.printStackTrace();
 81 | 		}
 82 | 		return lexemes;
 83 | 	}
 84 | 	
 85 | 	
 86 | 	/**
 87 | 	 * 根据分词结果生成SWMC搜索
 88 | 	 * @param fieldName
 89 | 	 * @param pathOption
 90 | 	 * @param quickMode
 91 | 	 * @return
 92 | 	 */
 93 | 	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
 94 | 		//构造SWMC的查询表达式
 95 | 		StringBuffer keywordBuffer = new StringBuffer();
 96 | 		//精简的SWMC的查询表达式
 97 | 		StringBuffer keywordBuffer_Short = new StringBuffer();
 98 | 		//记录最后词元长度
 99 | 		int lastLexemeLength = 0;
100 | 		//记录最后词元结束位置
101 | 		int lastLexemeEnd = -1;
102 | 		
103 | 		int shortCount = 0;
104 | 		int totalCount = 0;
105 | 		for(Lexeme l : lexemes){
106 | 			totalCount += l.getLength();
107 | 			//精简表达式
108 | 			if(l.getLength() > 1){
109 | 				keywordBuffer_Short.append(' ').append(l.getLexemeText());
110 | 				shortCount += l.getLength();
111 | 			}
112 | 			
113 | 			if(lastLexemeLength == 0){
114 | 				keywordBuffer.append(l.getLexemeText());				
115 | 			}else if(lastLexemeLength == 1 && l.getLength() == 1
116 | 					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
117 | 				keywordBuffer.append(l.getLexemeText());
118 | 			}else{
119 | 				keywordBuffer.append(' ').append(l.getLexemeText());
120 | 				
121 | 			}
122 | 			lastLexemeLength = l.getLength();
123 | 			lastLexemeEnd = l.getEndPosition();
124 | 		}
125 | 
126 | 		//借助lucene queryparser 生成SWMC Query
127 | 		QueryParser qp = new QueryParser(Version.LUCENE_45, fieldName, new StandardAnalyzer(Version.LUCENE_45));
128 | 		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
129 | 		qp.setAutoGeneratePhraseQueries(true);
130 | 		
131 | 		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
132 | 			try {
133 | 				//System.out.println(keywordBuffer.toString());
134 | 				Query q = qp.parse(keywordBuffer_Short.toString());
135 | 				return q;
136 | 			} catch (ParseException e) {
137 | 				e.printStackTrace();
138 | 			}
139 | 			
140 | 		}else{
141 | 			if(keywordBuffer.length() > 0){
142 | 				try {
143 | 					//System.out.println(keywordBuffer.toString());
144 | 					Query q = qp.parse(keywordBuffer.toString());
145 | 					return q;
146 | 				} catch (ParseException e) {
147 | 					e.printStackTrace();
148 | 				}
149 | 			}
150 | 		}
151 | 		return null;
152 | 	}
153 | }
154 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/test/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  * 
 25 |  */
 26 | package org.wltea.analyzer.sample;
 27 | 
 28 | import java.io.IOException;
 29 | 
 30 | import org.apache.lucene.analysis.Analyzer;
 31 | import org.apache.lucene.document.Document;
 32 | import org.apache.lucene.document.Field;
 33 | import org.apache.lucene.document.StringField;
 34 | import org.apache.lucene.document.TextField;
 35 | import org.apache.lucene.index.CorruptIndexException;
 36 | import org.apache.lucene.index.DirectoryReader;
 37 | import org.apache.lucene.index.IndexReader;
 38 | import org.apache.lucene.index.IndexWriter;
 39 | import org.apache.lucene.index.IndexWriterConfig;
 40 | import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 41 | import org.apache.lucene.queryparser.classic.ParseException;
 42 | import org.apache.lucene.queryparser.classic.QueryParser;
 43 | import org.apache.lucene.search.IndexSearcher;
 44 | import org.apache.lucene.search.Query;
 45 | import org.apache.lucene.search.ScoreDoc;
 46 | import org.apache.lucene.search.TopDocs;
 47 | import org.apache.lucene.store.Directory;
 48 | import org.apache.lucene.store.LockObtainFailedException;
 49 | import org.apache.lucene.store.RAMDirectory;
 50 | import org.apache.lucene.util.Version;
 51 | import org.wltea.analyzer.lucene.IKAnalyzer;
 52 | 
 53 | 
 54 | 
 55 | 
 56 | /**
 57 |  * 使用IKAnalyzer进行Lucene索引和查询的演示
 58 |  * 2012-3-2
 59 |  * 
 60 |  * 以下是结合Lucene4.0 API的写法
 61 |  *
 62 |  */
 63 | public class LuceneIndexAndSearchDemo {
 64 | 	
 65 | 	
 66 | 	/**
 67 | 	 * 模拟：
 68 | 	 * 创建一个单条记录的索引，并对其进行搜索
 69 | 	 * @param args
 70 | 	 */
 71 | 	public static void main(String[] args){
 72 | 		//Lucene Document的域名
 73 | 		String fieldName = "text";
 74 | 		 //检索内容
 75 | 		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
 76 | 		
 77 | 		//实例化IKAnalyzer分词器
 78 | 		Analyzer analyzer = new IKAnalyzer(true);
 79 | 		
 80 | 		Directory directory = null;
 81 | 		IndexWriter iwriter = null;
 82 | 		IndexReader ireader = null;
 83 | 		IndexSearcher isearcher = null;
 84 | 		try {
 85 | 			//建立内存索引对象
 86 | 			directory = new RAMDirectory();	 
 87 | 			
 88 | 			//配置IndexWriterConfig
 89 | 			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_45 , analyzer);
 90 | 			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
 91 | 			iwriter = new IndexWriter(directory , iwConfig);
 92 | 			//写入索引
 93 | 			Document doc = new Document();
 94 | 			doc.add(new StringField("ID", "10000", Field.Store.YES));
 95 | 			doc.add(new TextField(fieldName, text, Field.Store.YES));
 96 | 			iwriter.addDocument(doc);
 97 | 			iwriter.close();
 98 | 			
 99 | 			
100 | 			//搜索过程**********************************
101 | 		    //实例化搜索器   
102 | 			ireader = DirectoryReader.open(directory);
103 | 			isearcher = new IndexSearcher(ireader);			
104 | 			
105 | 			String keyword = "中文分词工具包";			
106 | 			//使用QueryParser查询分析器构造Query对象
107 | 			QueryParser qp = new QueryParser(Version.LUCENE_45, fieldName,  analyzer);
108 | 			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
109 | 			Query query = qp.parse(keyword);
110 | 			System.out.println("Query = " + query);
111 | 			
112 | 			//搜索相似度最高的5条记录
113 | 			TopDocs topDocs = isearcher.search(query , 5);
114 | 			System.out.println("命中：" + topDocs.totalHits);
115 | 			//输出结果
116 | 			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
117 | 			for (int i = 0; i < topDocs.totalHits; i++){
118 | 				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
119 | 				System.out.println("内容：" + targetDoc.toString());
120 | 			}			
121 | 			
122 | 		} catch (CorruptIndexException e) {
123 | 			e.printStackTrace();
124 | 		} catch (LockObtainFailedException e) {
125 | 			e.printStackTrace();
126 | 		} catch (IOException e) {
127 | 			e.printStackTrace();
128 | 		} catch (ParseException e) {
129 | 			e.printStackTrace();
130 | 		} finally{
131 | 			if(ireader != null){
132 | 				try {
133 | 					ireader.close();
134 | 				} catch (IOException e) {
135 | 					e.printStackTrace();
136 | 				}
137 | 			}
138 | 			if(directory != null){
139 | 				try {
140 | 					directory.close();
141 | 				} catch (IOException e) {
142 | 					e.printStackTrace();
143 | 				}
144 | 			}
145 | 		}
146 | 	}
147 | }
148 | 


--------------------------------------------------------------------------------
/xultimate-solr/src/test/resources/applicationContext-service-solr-client.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <beans xmlns="http://www.springframework.org/schema/beans"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
 5 | 	xmlns:c="http://www.springframework.org/schema/c"
 6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
 7 | 	xmlns:context="http://www.springframework.org/schema/context"
 8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
 9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
13 | 	xmlns:p="http://www.springframework.org/schema/p"
14 | 	xmlns:task="http://www.springframework.org/schema/task"
15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
16 | 	xmlns:util="http://www.springframework.org/schema/util"
17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
29 | 	
30 | 	<bean id="requestConfigBuilder" class="org.apache.http.client.config.RequestConfig" factory-method="custom" >
31 | 		<property name="connectionRequestTimeout" value="0" /> <!-- 等价于future.get(0)，表示从池中获取实例的超时时间 -->
32 | 		<property name="connectTimeout" value="2000" /> <!-- 等价于new Socket(); socket.connect(socketAddress, 2000)，表示连接的超时时间 -->
33 | 		<property name="socketTimeout" value="1500" /> <!-- 等价于socket.setSoTimeout(1500)，表示获取输入流的超时时间 -->
34 | 	</bean>
35 | 
36 | 	<bean id="httpClientBuilder" class="org.apache.http.impl.client.HttpClientBuilder" factory-method="create">
37 | 		<property name="maxConnTotal" value="128" />
38 | 		<property name="maxConnPerRoute" value="3" />
39 | 		<property name="defaultRequestConfig">
40 | 			<bean factory-bean="requestConfigBuilder" factory-method="build" />
41 | 		</property>
42 | 	</bean>
43 | 	
44 | 	<bean id="httpClient" factory-bean="httpClientBuilder" factory-method="build" destroy-method="close" />
45 | 	
46 | 	<bean id="requestWriter" class="org.apache.solr.client.solrj.impl.BinaryRequestWriter" />
47 | 	<bean id="responseParser" class="org.apache.solr.client.solrj.impl.BinaryResponseParser" />
48 | 	<bean id="httpSolrServer" class="org.apache.solr.client.solrj.impl.HttpSolrServer" destroy-method="shutdown"
49 | 		c:baseURL="http://192.168.2.150:8080/solr/mycollection" 
50 | 		c:client-ref="httpClient"
51 | 		c:parser-ref="responseParser"
52 | 		p:requestWriter-ref="requestWriter"
53 | 		p:maxRetries="2"
54 | 		p:useMultiPartPost="false" />
55 | 	
56 | 	<bean id="concurrentUpdateSolrServer" class="org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer" 
57 | 		c:solrServerUrl="http://192.168.2.150:8080/solr/mycollection" destroy-method="shutdown" 
58 | 		c:queueSize="100"
59 | 		c:threadCount="20"
60 | 		p:soTimeout="1500"
61 | 		p:connectionTimeout="2000"
62 | 		p:requestWriter-ref="requestWriter"
63 | 		p:parser-ref="responseParser" />
64 | 	<bean id="lbHttpSolrServer" class="org.apache.solr.client.solrj.impl.LBHttpSolrServer" destroy-method="shutdown" 
65 | 		c:httpClient-ref="httpClient"
66 | 		c:parser-ref="responseParser"
67 | 		c:solrServerUrl="http://192.168.2.150:8080/solr/mycollection,http://192.168.2.151:8080/solr/mycollection,http://192.168.2.152:8080/solr/mycollection,http://192.168.2.153:8080/solr/mycollection"
68 | 		p:aliveCheckInterval="60"
69 | 		p:requestWriter-ref="requestWriter" />
70 | 		
71 | 	<bean id="nullLBHttpSolrServer" class="org.apache.solr.client.solrj.impl.LBHttpSolrServer" destroy-method="shutdown" 
72 | 		c:solrServerUrl=""
73 | 		c:httpClient-ref="httpClient"
74 | 		c:parser-ref="responseParser"
75 | 		p:aliveCheckInterval="60"
76 | 		p:requestWriter-ref="requestWriter" />
77 | 	<!-- 看了zkHost涉及到的代码，可以通过","分割，原来测试的时候好像不行，应该是原来配置问题，下次搭建环境后重新测试一下 -->
78 | 	<bean id="cloudSolrServer" class="org.apache.solr.client.solrj.impl.CloudSolrServer" destroy-method="shutdown" 
79 | 		p:defaultCollection="mycollection"
80 | 		c:updatesToLeaders="true" 
81 | 		c:zkHost="192.168.2.150:2181"
82 | 		c:lbServer-ref="nullLBHttpSolrServer"
83 | 		p:parser-ref="responseParser"
84 | 		p:parallelUpdates="true"
85 | 		p:zkClientTimeout="25000"
86 | 		p:zkConnectTimeout="2000" />
87 | </beans>
88 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/applicationContext-dao-generic.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <beans xmlns="http://www.springframework.org/schema/beans"
  3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 | 	xmlns:aop="http://www.springframework.org/schema/aop"
  5 | 	xmlns:c="http://www.springframework.org/schema/c"
  6 | 	xmlns:cache="http://www.springframework.org/schema/cache"
  7 | 	xmlns:context="http://www.springframework.org/schema/context"
  8 | 	xmlns:jdbc="http://www.springframework.org/schema/jdbc"
  9 | 	xmlns:jee="http://www.springframework.org/schema/jee"
 10 | 	xmlns:lang="http://www.springframework.org/schema/lang"
 11 | 	xmlns:mvc="http://www.springframework.org/schema/mvc"
 12 | 	xmlns:oxm="http://www.springframework.org/schema/oxm"
 13 | 	xmlns:p="http://www.springframework.org/schema/p"
 14 | 	xmlns:task="http://www.springframework.org/schema/task"
 15 | 	xmlns:tx="http://www.springframework.org/schema/tx"
 16 | 	xmlns:util="http://www.springframework.org/schema/util"
 17 | 	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
 18 | 		http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop.xsd
 19 | 		http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd
 20 | 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
 21 | 		http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc.xsd
 22 | 		http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee.xsd
 23 | 		http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang.xsd
 24 | 		http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc.xsd
 25 | 		http://www.springframework.org/schema/oxm http://www.springframework.org/schema/oxm/spring-oxm.xsd
 26 | 		http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd
 27 | 		http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd
 28 | 		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
 29 | 	
 30 | 	<!-- 导入数据源相关属性信息-->
 31 | 	<util:properties id="databasesProperties" location="classpath:/databases.properties" />
 32 | 	<util:list id="encryptPropertyPlaceholderConfigurerLocalArray">
 33 | 		<ref bean="databasesProperties"/>
 34 | 	</util:list>
 35 | 	<bean parent="encryptPropertyPlaceholderConfigurer" p:propertiesArray-ref="encryptPropertyPlaceholderConfigurerLocalArray" p:decryptor-ref="stringStringAESCryptor" >
 36 | 		<property name="encryptPropertyNames">
 37 | 			<array>
 38 | 				<value>datasource.solrDb.jdbc.password</value>
 39 | 				<value>datasource.solrDb.jdbc.username</value>
 40 | 			</array>
 41 | 		</property>
 42 | 	</bean>
 43 | 	<!-- MyBatis配置 -->
 44 | 	<bean id="sqlSessionFactory" class="org.danielli.xultimate.orm.mybatis.SqlSessionFactoryBean" 
 45 | 		p:dataSource-ref="solrDbDataSource" p:typeAliasesPackage="org.danielli.xultimate.searching.po"
 46 | 		p:mapperLocations="classpath:/mybatis/*DAO.xml">
 47 | 		<property name="typeHandlers">
 48 | 			<array>
 49 | 				<bean class="org.danielli.xultimate.orm.mybatis.type.BigDecimalTypeHandler" />
 50 | 				<bean class="org.danielli.xultimate.orm.mybatis.type.ClobNullParameterTypeHandler" />
 51 | 				<bean class="org.danielli.xultimate.orm.mybatis.type.StringNullParameterTypeHandler" />
 52 | 			</array>
 53 | 		</property>
 54 | 		<property name="configuration">
 55 | 			<bean class="org.apache.ibatis.session.Configuration">
 56 | 				<!-- 全局映射器启用缓存 -->
 57 | 				<property name="cacheEnabled" value="false" />
 58 | 				<!-- true表示启动延迟加载，false表示禁用延迟加载 -->
 59 | 				<property name="lazyLoadingEnabled" value="false" />
 60 | 				<!-- lazyLoadingEnabled=true时，true表示当访问对象中一个懒对象属性时，将完全加载这个对象的所有懒对象属性。false，当延迟加载时，按需加载对象属性。默认为true -->
 61 | 				<property name="aggressiveLazyLoading" value="false" />
 62 | 				<!-- 对于未知的SQL查询，允许返回不同的结果集以达到通用的效果 -->
 63 | 				<property name="multipleResultSetsEnabled" value="true" />
 64 | 				<!-- 允许使用列标签代替列名 -->
 65 | 				<property name="useColumnLabel" value="true" />
 66 | 				<!-- 允许使用自定义的主建键 -->
 67 | 				<property name="useGeneratedKeys" value="true" />
 68 | 				<!-- 给予被嵌套的resultMap以字段－属性的映射支持 -->
 69 | 				<property name="autoMappingBehavior" value="FULL" />
 70 | 				<!-- BATCH可以重复执行语句和批量更新，但返回的受影响行数不正确，所以不应该使用 -->
 71 | 				<!-- REUSE可能重复使用PreparedStatements语句 -->
 72 | 				<property name="defaultExecutorType" value="REUSE" />
 73 | 				<!-- 数据库超过25000秒仍未响应则超时 -->
 74 | 				<property name="defaultStatementTimeout" value="25000" />
 75 | 			</bean>
 76 | 		</property>
 77 | 	</bean>
 78 | 	<bean id="solrDbDataSource" parent="abstractDataSource"
 79 | 		p:url="${datasource.solrDb.jdbc.url}"
 80 | 		p:username="${datasource.solrDb.jdbc.username}"
 81 | 		p:password="${datasource.solrDb.jdbc.password}"> 
 82 | 	    <!-- 配置最大 -->
 83 | 	    <property name="maxActive" value="${datasource.solrDb.pool.maxActive}" />
 84 | 	    <!-- 配置初始化大小、最小 -->
 85 | 	    <property name="initialSize" value="${datasource.solrDb.pool.minIdle}" />
 86 | 	    <property name="minIdle" value="${datasource.solrDb.pool.minIdle}" />
 87 | 	</bean>
 88 | 	
 89 | 	<jdbc:initialize-database enabled="true" data-source="solrDbDataSource">
 90 | 		<jdbc:script separator=";" execution="INIT" location="classpath:solr_db_init.sql"/>
 91 | 	</jdbc:initialize-database>
 92 | 	
 93 | 	<bean id="sqlSessionTemplate" class="org.mybatis.spring.SqlSessionTemplate" c:sqlSessionFactory-ref="sqlSessionFactory" />
 94 | 	
 95 | 	<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer" 
 96 | 		p:processPropertyPlaceHolders="true"  p:sqlSessionTemplateBeanName="sqlSessionTemplate" 
 97 | 		p:basePackage="org.danielli.xultimate.searching"
 98 | 		p:annotationClass="org.danielli.xultimate.orm.mybatis.MyBatisRepository" />
 99 | </beans>
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # xultimate-searching #
  2 | 
  3 | 使用IKAnalyzer和Solr作为搜索服务。不通过文件维护词库，而是通过数据库维护。
  4 | 
  5 | ## xultimate-solr ##
  6 | 
  7 | * 基于SolrCloud，提供了SolrJ的一些ShowCase。包含HttpSolrServer、ConcurrentUpdateSolrServer、LBHttpSolrServer、CloudSolrServer的Spring配置形式。
  8 | 
  9 | #### SolrCloud创建数据集Collection并自动分片，手动分配副本 ####
 10 | 
 11 | 	http://192.168.2.150:8080/solr/admin/collections?action=CREATE&name=mycollection&numShards=4&replicationFactor=1&maxShardsPerNode=4	
 12 | 	// 手动创建192.168.2.150副本Replicas。假设shard1处在192.168.1.150上
 13 | 	http://192.168.2.151:8080/solr/admin/cores?action=CREATE&name=mycollection_shard1_replica_2&collection=mycollection&shard=shard1
 14 | 	http://192.168.2.152:8080/solr/admin/cores?action=CREATE&name=mycollection_shard1_replica_3&collection=mycollection&shard=shard1
 15 | 	http://192.168.2.153:8080/solr/admin/cores?action=CREATE&name=mycollection_shard1_replica_4&collection=mycollection&shard=shard1
 16 | 	// 手动创建192.168.2.153副本Replicas。假设shard2处在192.168.1.153上
 17 | 	http://192.168.2.150:8080/solr/admin/cores?action=CREATE&name=mycollection_shard2_replica_2&collection=mycollection&shard=shard2
 18 | 	http://192.168.2.151:8080/solr/admin/cores?action=CREATE&name=mycollection_shard2_replica_3&collection=mycollection&shard=shard2
 19 | 	http://192.168.2.152:8080/solr/admin/cores?action=CREATE&name=mycollection_shard2_replica_4&collection=mycollection&shard=shard2
 20 | 	// 手动创建192.168.2.152副本Replicas。假设shard3处在192.168.1.152上
 21 | 	http://192.168.2.150:8080/solr/admin/cores?action=CREATE&name=mycollection_shard3_replica_2&collection=mycollection&shard=shard3
 22 | 	http://192.168.2.151:8080/solr/admin/cores?action=CREATE&name=mycollection_shard3_replica_3&collection=mycollection&shard=shard3
 23 | 	http://192.168.2.153:8080/solr/admin/cores?action=CREATE&name=mycollection_shard3_replica_4&collection=mycollection&shard=shard3
 24 | 	// 手动创建192.168.2.151副本Replicas	。假设shard4处在192.168.1.151上
 25 | 	http://192.168.2.150:8080/solr/admin/cores?action=CREATE&name=mycollection_shard4_replica_2&collection=mycollection&shard=shard4
 26 | 	http://192.168.2.152:8080/solr/admin/cores?action=CREATE&name=mycollection_shard4_replica_3&collection=mycollection&shard=shard4
 27 | 	http://192.168.2.153:8080/solr/admin/cores?action=CREATE&name=mycollection_shard4_replica_4&collection=mycollection&shard=shard4
 28 | 
 29 | #### SolrCloud删除数据集Collection ####
 30 | 	
 31 | 	http://192.168.2.150:8080/solr/admin/collections?action=DELETE&name=mycollection
 32 | 	
 33 | #### SolrCloud创建数据集Collection并手动分片(必须创建numShards个，否则无法使用) ####
 34 | 
 35 | 	http://192.168.2.150:8080/solr/admin/cores?action=CREATE&name=mycollection2_shard1_replica_1&collection=mycollection2&shard=shard1&numShards=4
 36 | 	http://192.168.2.151:8080/solr/admin/cores?action=CREATE&name=mycollection2_shard2_replica_1&collection=mycollection2&shard=shard2
 37 | 	http://192.168.2.152:8080/solr/admin/cores?action=CREATE&name=mycollection2_shard3_replica_1&collection=mycollection2&shard=shard3
 38 | 	http://192.168.2.153:8080/solr/admin/cores?action=CREATE&name=mycollection2_shard4_replica_1&collection=mycollection2&shard=shard4
 39 | 
 40 | #### SolrCloud对指定分片在进行自动切分 ####
 41 | 
 42 | 	http://192.168.2.150:8080/solr/admin/collections?action=CREATE&name=mycollection3&numShards=1&replicationFactor=1&maxShardsPerNode=4	
 43 | 　　
 44 | 	或
 45 | 	
 46 | 	http://192.168.2.150:8080/solr/admin/cores?action=CREATE&name=mycollection3_shard1_replica_1&collection=mycollection3&shard=shard1&numShards=1
 47 | 	http://192.168.2.150:8080/solr/admin/collections?action=SPLITSHARD&collection=mycollection3&shard=shard1	// 只支持自动分片的再切分。
 48 | 	http://192.168.2.153:8080/solr/admin/cores?action=UNLOAD&core=mycollection3_shard1_replica1					// mycollection3_shard1_replica1在192.168.2.153上。
 49 | 
 50 | ## xultimate-ikanalyzer ##
 51 | 
 52 | * 扩展Dictionary类，提供停用词、量词的扩展接口。
 53 | * 提供ExtKeywordInitializer，用于通过数据库维护IKAnalyzer的扩展词词库。
 54 | * 提供StopKeywordInitializer，用于通过数据库维护IKAnalyzer的停用词词库。
 55 | * 提供SynonymFilterFactory，用于通过数据库维护Solr的相近词词库。
 56 | * 提供IKTokenizerFactory，解决Solr中IKAnalyzer在查询时无法使用userSmart问题。
 57 | 
 58 | #### 配置扩展词、停用此、相近词从文件读取 ####
 59 | 
 60 | * 将项目导出为jar文件，拷贝到solr/WEB-INF/lib/下。
 61 | * 复制src/test/resources下的ext.dic IKAnalyzer.cfg.xml stopword.dic到solr/WEB-INF/classes/下。
 62 | * 编辑$SOLR_HOME/collection1/conf/synonyms.txt。
 63 | * 编辑$SOLR_HOME/collection1/conf/schema.xml，添加类型
 64 | 
 65 |  	\<fieldType name="text_ikanalyzer" class="solr.TextField" \>
 66 | 		\<analyzer type="index"\>
 67 |             \<tokenizer class="org.danielli.xultimate.searching.IKTokenizerFactory" useSmart="false"/\>
 68 |         \</analyzer\> 
 69 |         \<analyzer type="query"\>
 70 |             \<tokenizer class="org.danielli.xultimate.searching.IKTokenizerFactory" useSmart="true"/\>
 71 | 	    	\<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/\>
 72 |         \</analyzer\> 
 73 |     \</fieldType\>
 74 |     
 75 | #### 配置扩展词、停用词、相近词从数据库读取 ####
 76 | 
 77 | * 将项目导出为jar文件，拷贝到solr/WEB-INF/lib/下。
 78 | * 拷贝xultimate-ikanalyzer的依赖jar到solr/WEB-INF/lib/下。
 79 | * 删除slf4j-log4j12-1.6.6.jar jcl-over-slf4j-1.6.6.jar jul-to-slf4j-1.6.6.jar log4j-1.2.16.jar slf4j-log4j12-1.6.6.jar slf4j-api-1.6.6.jar commons-lang-2.4.jar log4j.properties
 80 | * 复制src/test/resources下的databases.properties到solr/WEB-INF/classes/下。
 81 | * 编辑solr/WEB-INF/web.xml，添加
 82 | 
 83 | 	\<context-param\>
 84 | 		\<param-name\>contextConfigLocation\</param-name\>
 85 | 		\<param-value\>classpath:applicationContext-service-config.xml, classpath:applicationContext-service-crypto.xml, classpath:applicationContext-dao-base.xml, classpath:applicationContext-dao-generic.xml, classpath:applicationContext-service-generic.xml\</param-value\>
 86 | 	\</context-param\>
 87 | 	\<listener\>
 88 | 		\<listener-class\>org.springframework.web.context.ContextLoaderListener\</listener-class\>
 89 | 	\</listener\>
 90 | 	
 91 | * 编辑$SOLR_HOME/collection1/conf/schema.xml，添加类型
 92 | 
 93 | 　　\<fieldType name="text_ikanalyzer" class="solr.TextField" \>
 94 | 		\<analyzer type="index"\>
 95 |             \<tokenizer class="org.danielli.xultimate.searching.IKTokenizerFactory" useSmart="false"/\>
 96 |         \</analyzer\> 
 97 |         \<analyzer type="query"\>
 98 |             \<tokenizer class="org.danielli.xultimate.searching.IKTokenizerFactory" useSmart="true"/\>
 99 | 	    \<filter class="org.danielli.xultimate.searching.SynonymFilterFactory" ignoreCase="true" expand="true"/\>
100 |         \</analyzer\> 
101 |     \</fieldType\>
102 | 
103 | 
104 | ## xultimate-lucene ##
105 | 
106 | * 三种Lucene近实时搜索ShowCase。包括添加自定义Collector、自定义Filter、自定义Sort。
107 | * 通过使用Spring进行Bean管理。
108 | 
109 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/QuickSortSet.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | /**
 28 |  * IK分词器专用的Lexem快速排序集合
 29 |  */
 30 | class QuickSortSet {
 31 | 	//链表头
 32 | 	private Cell head;
 33 | 	//链表尾
 34 | 	private Cell tail;
 35 | 	//链表的实际大小
 36 | 	private int size;
 37 | 	
 38 | 	QuickSortSet(){
 39 | 		this.size = 0;
 40 | 	}
 41 | 	
 42 | 	/**
 43 | 	 * 向链表集合添加词元
 44 | 	 * @param lexeme
 45 | 	 */
 46 | 	boolean addLexeme(Lexeme lexeme){
 47 | 		Cell newCell = new Cell(lexeme); 
 48 | 		if(this.size == 0){
 49 | 			this.head = newCell;
 50 | 			this.tail = newCell;
 51 | 			this.size++;
 52 | 			return true;
 53 | 			
 54 | 		}else{
 55 | 			if(this.tail.compareTo(newCell) == 0){//词元与尾部词元相同，不放入集合
 56 | 				return false;
 57 | 				
 58 | 			}else if(this.tail.compareTo(newCell) < 0){//词元接入链表尾部
 59 | 				this.tail.next = newCell;
 60 | 				newCell.prev = this.tail;
 61 | 				this.tail = newCell;
 62 | 				this.size++;
 63 | 				return true;
 64 | 				
 65 | 			}else if(this.head.compareTo(newCell) > 0){//词元接入链表头部
 66 | 				this.head.prev = newCell;
 67 | 				newCell.next = this.head;
 68 | 				this.head = newCell;
 69 | 				this.size++;
 70 | 				return true;
 71 | 				
 72 | 			}else{					
 73 | 				//从尾部上逆
 74 | 				Cell index = this.tail;
 75 | 				while(index != null && index.compareTo(newCell) > 0){
 76 | 					index = index.prev;
 77 | 				}
 78 | 				if(index.compareTo(newCell) == 0){//词元与集合中的词元重复，不放入集合
 79 | 					return false;
 80 | 					
 81 | 				}else if(index.compareTo(newCell) < 0){//词元插入链表中的某个位置
 82 | 					newCell.prev = index;
 83 | 					newCell.next = index.next;
 84 | 					index.next.prev = newCell;
 85 | 					index.next = newCell;
 86 | 					this.size++;
 87 | 					return true;					
 88 | 				}
 89 | 			}
 90 | 		}
 91 | 		return false;
 92 | 	}
 93 | 	
 94 | 	/**
 95 | 	 * 返回链表头部元素
 96 | 	 * @return
 97 | 	 */
 98 | 	Lexeme peekFirst(){
 99 | 		if(this.head != null){
100 | 			return this.head.lexeme;
101 | 		}
102 | 		return null;
103 | 	}
104 | 	
105 | 	/**
106 | 	 * 取出链表集合的第一个元素
107 | 	 * @return Lexeme
108 | 	 */
109 | 	Lexeme pollFirst(){
110 | 		if(this.size == 1){
111 | 			Lexeme first = this.head.lexeme;
112 | 			this.head = null;
113 | 			this.tail = null;
114 | 			this.size--;
115 | 			return first;
116 | 		}else if(this.size > 1){
117 | 			Lexeme first = this.head.lexeme;
118 | 			this.head = this.head.next;
119 | 			this.size --;
120 | 			return first;
121 | 		}else{
122 | 			return null;
123 | 		}
124 | 	}
125 | 	
126 | 	/**
127 | 	 * 返回链表尾部元素
128 | 	 * @return
129 | 	 */
130 | 	Lexeme peekLast(){
131 | 		if(this.tail != null){
132 | 			return this.tail.lexeme;
133 | 		}
134 | 		return null;
135 | 	}
136 | 	
137 | 	/**
138 | 	 * 取出链表集合的最后一个元素
139 | 	 * @return Lexeme
140 | 	 */
141 | 	Lexeme pollLast(){
142 | 		if(this.size == 1){
143 | 			Lexeme last = this.head.lexeme;
144 | 			this.head = null;
145 | 			this.tail = null;
146 | 			this.size--;
147 | 			return last;
148 | 			
149 | 		}else if(this.size > 1){
150 | 			Lexeme last = this.tail.lexeme;
151 | 			this.tail = this.tail.prev;
152 | 			this.size--;
153 | 			return last;
154 | 			
155 | 		}else{
156 | 			return null;
157 | 		}
158 | 	}
159 | 	
160 | 	/**
161 | 	 * 返回集合大小
162 | 	 * @return
163 | 	 */
164 | 	int size(){
165 | 		return this.size;
166 | 	}
167 | 	
168 | 	/**
169 | 	 * 判断集合是否为空
170 | 	 * @return
171 | 	 */
172 | 	boolean isEmpty(){
173 | 		return this.size == 0;
174 | 	}
175 | 	
176 | 	/**
177 | 	 * 返回lexeme链的头部
178 | 	 * @return
179 | 	 */
180 | 	Cell getHead(){
181 | 		return this.head;
182 | 	}
183 | 	
184 | 	/**
185 | 	 * 
186 | 	 * IK 中文分词  版本 5.0
187 | 	 * IK Analyzer release 5.0
188 | 	 * 
189 | 	 * Licensed to the Apache Software Foundation (ASF) under one or more
190 | 	 * contributor license agreements.  See the NOTICE file distributed with
191 | 	 * this work for additional information regarding copyright ownership.
192 | 	 * The ASF licenses this file to You under the Apache License, Version 2.0
193 | 	 * (the "License"); you may not use this file except in compliance with
194 | 	 * the License.  You may obtain a copy of the License at
195 | 	 *
196 | 	 *     http://www.apache.org/licenses/LICENSE-2.0
197 | 	 *
198 | 	 * Unless required by applicable law or agreed to in writing, software
199 | 	 * distributed under the License is distributed on an "AS IS" BASIS,
200 | 	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | 	 * See the License for the specific language governing permissions and
202 | 	 * limitations under the License.
203 | 	 *
204 | 	 * 源代码由林良益(linliangyi2005@gmail.com)提供
205 | 	 * 版权声明 2012，乌龙茶工作室
206 | 	 * provided by Linliangyi and copyright 2012 by Oolong studio
207 | 	 * 
208 | 	 * QuickSortSet集合单元
209 | 	 * 
210 | 	 */
211 | 	class Cell implements Comparable<Cell>{
212 | 		private Cell prev;
213 | 		private Cell next;
214 | 		private Lexeme lexeme;
215 | 		
216 | 		Cell(Lexeme lexeme){
217 | 			if(lexeme == null){
218 | 				throw new IllegalArgumentException("lexeme must not be null");
219 | 			}
220 | 			this.lexeme = lexeme;
221 | 		}
222 | 
223 | 		public int compareTo(Cell o) {
224 | 			return this.lexeme.compareTo(o.lexeme);
225 | 		}
226 | 
227 | 		public Cell getPrev(){
228 | 			return this.prev;
229 | 		}
230 | 		
231 | 		public Cell getNext(){
232 | 			return this.next;
233 | 		}
234 | 		
235 | 		public Lexeme getLexeme(){
236 | 			return this.lexeme;
237 | 		}
238 | 	}
239 | }
240 | 


--------------------------------------------------------------------------------
/xultimate-solr/src/test/java/org/danielli/xultimate/solr/SolrServerTest.java:
--------------------------------------------------------------------------------
  1 | package org.danielli.xultimate.solr;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.Arrays;
  6 | import java.util.Collection;
  7 | import java.util.List;
  8 | 
  9 | import javax.annotation.Resource;
 10 | 
 11 | import org.apache.solr.client.solrj.SolrQuery;
 12 | import org.apache.solr.client.solrj.SolrServer;
 13 | import org.apache.solr.client.solrj.SolrServerException;
 14 | import org.apache.solr.client.solrj.beans.Field;
 15 | import org.apache.solr.client.solrj.response.QueryResponse;
 16 | import org.apache.solr.common.SolrDocument;
 17 | import org.apache.solr.common.SolrDocumentList;
 18 | import org.apache.solr.common.SolrInputDocument;
 19 | import org.danielli.xultimate.util.math.RandomNumberUtils;
 20 | import org.junit.Test;
 21 | import org.junit.runner.RunWith;
 22 | import org.springframework.test.context.ContextConfiguration;
 23 | import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 24 | 
 25 | @RunWith(SpringJUnit4ClassRunner.class)
 26 | @ContextConfiguration(locations = { "classpath:/applicationContext-service-solr-client.xml"})
 27 | public class SolrServerTest {
 28 | 
 29 | //	@Resource(name = "httpSolrServer")
 30 | //	@Resource(name = "lbHttpSolrServer")
 31 | 	@Resource(name = "cloudSolrServer")
 32 | 	private SolrServer querySolrServer;
 33 | 	
 34 | //	@Resource(name = "concurrentUpdateSolrServer")
 35 | //	@Resource(name = "lbHttpSolrServer")
 36 | 	@Resource(name = "cloudSolrServer")
 37 | 	private SolrServer updateSolrServer;
 38 | 	
 39 | 	@Test
 40 | 	public void testAddDocument() throws SolrServerException, IOException {
 41 | 		addDocument();
 42 | 		queryReturnDocument();
 43 | 		deleteAll();
 44 | 		queryReturnDocument();
 45 | 	}
 46 | 	
 47 | 	private void addDocument() throws SolrServerException, IOException {
 48 | 		List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
 49 | 		for (int i = 0 ; i < 100; i++) {
 50 | 			SolrInputDocument document = new SolrInputDocument();
 51 | 			document.addField("id", "id" + RandomNumberUtils.nextInt(1000), 1.0f);
 52 | 			document.addField("name", "doc" + i, 1.0f);
 53 | 			document.addField("price", RandomNumberUtils.nextInt(1000));
 54 | 		    solrInputDocuments.add(document);
 55 | 		}
 56 | 
 57 | 	    // Do a commit, wait flush, wait searcher.
 58 | 	    updateSolrServer.add(solrInputDocuments);	// Add the documents to Solr.
 59 | 	    updateSolrServer.commit();	
 60 | 	}
 61 | 	
 62 | 	public void deleteAll() throws SolrServerException, IOException {
 63 | 		updateSolrServer.deleteByQuery("*:*");	// CAUTION: deletes everything!
 64 | 		updateSolrServer.commit();
 65 | 	}
 66 | 	
 67 | 	private void queryReturnDocument() throws SolrServerException {
 68 | 		SolrQuery query = new SolrQuery();
 69 | 	    query.setQuery("*:*");
 70 | 	    query.addSort("price", SolrQuery.ORDER.asc);
 71 | 	    QueryResponse rsp = querySolrServer.query(query);
 72 | 	    SolrDocumentList documentList = rsp.getResults();
 73 | 	    for (SolrDocument resultDoc : documentList) {
 74 | 		     String id = (String) resultDoc.getFieldValue("id"); //id is the uniqueKey field
 75 | 		     System.out.println(id);
 76 | 	    }
 77 | 	    System.out.println("发现" + documentList.getNumFound() + "个文档");
 78 | 	}
 79 | 	
 80 | //	@Test
 81 | 	public void testAddBean() throws SolrServerException, IOException {
 82 | 		addBean();
 83 | 		queryReturnEntity();
 84 | 		deleteAll();
 85 | 		queryReturnEntity();
 86 | 	}
 87 | 	
 88 | 	private void addBean() throws IOException, SolrServerException {
 89 | 		Item item1 = new Item();
 90 | 	    item1.setId("id" + RandomNumberUtils.nextInt(1000));
 91 | 	    item1.setFeatures(Arrays.asList("aaa", "bbb", "ccc"));
 92 | 	    
 93 | 		Item item2 = new Item();
 94 | 	    item2.setId("id" + RandomNumberUtils.nextInt(1000));
 95 | 	    item2.setFeatures(Arrays.asList("ddd", "eee", "fff"));
 96 | 	    
 97 | 	    updateSolrServer.addBeans(Arrays.asList(item1, item2));	// Add the beans to Solr
 98 | 	    updateSolrServer.commit();								// Do a commit
 99 | 	}
100 | 	
101 | 	public static class Item {
102 | 	    @Field
103 | 	    private String id;
104 | 
105 | 	    @Field("cat")
106 | 	    private String[] categories;
107 | 
108 | 	    @Field
109 | 	    private List<String> features;
110 | 
111 | 		public String getId() {
112 | 			return id;
113 | 		}
114 | 
115 | 		public void setId(String id) {
116 | 			this.id = id;
117 | 		}
118 | 
119 | 		public String[] getCategories() {
120 | 			return categories;
121 | 		}
122 | 
123 | //		@Field annotation can be applied on setter methods
124 | //		@Field("cat")		
125 | 		public void setCategories(String[] categories) {
126 | 			this.categories = categories;
127 | 		}
128 | 
129 | 		public List<String> getFeatures() {
130 | 			return features;
131 | 		}
132 | 
133 | 		public void setFeatures(List<String> features) {
134 | 			this.features = features;
135 | 		}
136 | 	}
137 | 	
138 | 	private void queryReturnEntity() throws SolrServerException {
139 | 		SolrQuery query = new SolrQuery();
140 | 	    query.setQuery("*:*");
141 | 	    query.addSort("price", SolrQuery.ORDER.asc);
142 | 	    QueryResponse rsp = querySolrServer.query(query);
143 | 	    List<Item> beans = rsp.getBeans(Item.class);
144 | 	    for (Item bean : beans) {
145 | 		     String id = bean.getId(); //id is the uniqueKey field
146 | 		     System.out.println(id);
147 | 	    }
148 | 	}
149 | 	
150 | //	@Test
151 | 	public void testQueryHighlighting() throws SolrServerException, IOException {
152 | 		addBean();
153 | 		queryHighlighting();
154 | 		deleteAll();
155 | 		queryHighlighting();
156 | 	}
157 | 
158 | 	private void queryHighlighting() throws SolrServerException {
159 | 		SolrQuery query = new SolrQuery();
160 | 	    query.setQuery("bbb");
161 | 
162 | 	    query.setHighlight(true).setHighlightSnippets(1); // set other params as needed
163 | 	    query.setParam("hl.fl", "features");				  // 设置高亮字段
164 | 	    QueryResponse queryResponse = querySolrServer.query(query);
165 | 	    
166 | 	    SolrDocumentList documentList = queryResponse.getResults();
167 | 	    for (SolrDocument resultDoc : documentList) {
168 | 	    	 Collection<Object>  features = resultDoc.getFieldValues("features");
169 | 		     String id = (String) resultDoc.getFieldValue("id"); //id is the uniqueKey field
170 | 		     if (queryResponse.getHighlighting().get(id) != null) {
171 | 		        List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get("features");
172 | 		        System.out.println(Arrays.toString(highlightSnippets.toArray()));
173 | 		     } else {
174 | 		    	 System.out.println(Arrays.toString(features.toArray()));
175 | 		     }
176 | 		     
177 | 	    }
178 | 	    
179 | 	    List<Item> beans = queryResponse.getBeans(Item.class);
180 | 	    for (Item bean : beans) {
181 | 		     String id = bean.getId();
182 | 		     List<String> features = bean.getFeatures();
183 | 		     if (queryResponse.getHighlighting().get(id) != null) {
184 | 		    	 List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get("features");
185 | 			     System.out.println(Arrays.toString(highlightSnippets.toArray()));
186 | 			 } else {
187 | 				 System.out.println(Arrays.toString(features.toArray()));
188 | 			 }
189 | 	    }
190 | 	}
191 | }
192 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | import java.util.HashSet;
 28 | import java.util.LinkedList;
 29 | import java.util.List;
 30 | import java.util.Set;
 31 | 
 32 | import org.wltea.analyzer.dic.Dictionary;
 33 | import org.wltea.analyzer.dic.Hit;
 34 | 
 35 | /**
 36 |  * 
 37 |  * 中文数量词子分词器
 38 |  */
 39 | class CN_QuantifierSegmenter implements ISegmenter{
 40 | 	
 41 | 	//子分词器标签
 42 | 	static final String SEGMENTER_NAME = "QUAN_SEGMENTER";
 43 | 	
 44 | 	//中文数词
 45 | 	private static String Chn_Num = "一二两三四五六七八九十零壹贰叁肆伍陆柒捌玖拾百千万亿拾佰仟萬億兆卅廿";//Cnum
 46 | 	private static Set<Character> ChnNumberChars = new HashSet<Character>();
 47 | 	static{
 48 | 		char[] ca = Chn_Num.toCharArray();
 49 | 		for(char nChar : ca){
 50 | 			ChnNumberChars.add(nChar);
 51 | 		}
 52 | 	}
 53 | 	
 54 | 	/*
 55 | 	 * 词元的开始位置，
 56 | 	 * 同时作为子分词器状态标识
 57 | 	 * 当start > -1 时，标识当前的分词器正在处理字符
 58 | 	 */
 59 | 	private int nStart;
 60 | 	/*
 61 | 	 * 记录词元结束位置
 62 | 	 * end记录的是在词元中最后一个出现的合理的数词结束
 63 | 	 */
 64 | 	private int nEnd;
 65 | 
 66 | 	//待处理的量词hit队列
 67 | 	private List<Hit> countHits;
 68 | 	
 69 | 	
 70 | 	CN_QuantifierSegmenter(){
 71 | 		nStart = -1;
 72 | 		nEnd = -1;
 73 | 		this.countHits  = new LinkedList<Hit>();
 74 | 	}
 75 | 	
 76 | 	/**
 77 | 	 * 分词
 78 | 	 */
 79 | 	public void analyze(AnalyzeContext context) {
 80 | 		//处理中文数词
 81 | 		this.processCNumber(context);
 82 | 		//处理中文量词
 83 | 		this.processCount(context);
 84 | 		
 85 | 		//判断是否锁定缓冲区
 86 | 		if(this.nStart == -1 && this.nEnd == -1	&& countHits.isEmpty()){
 87 | 			//对缓冲区解锁
 88 | 			context.unlockBuffer(SEGMENTER_NAME);
 89 | 		}else{
 90 | 			context.lockBuffer(SEGMENTER_NAME);
 91 | 		}
 92 | 	}
 93 | 	
 94 | 
 95 | 	/**
 96 | 	 * 重置子分词器状态
 97 | 	 */
 98 | 	public void reset() {
 99 | 		nStart = -1;
100 | 		nEnd = -1;
101 | 		countHits.clear();
102 | 	}
103 | 	
104 | 	/**
105 | 	 * 处理数词
106 | 	 */
107 | 	private void processCNumber(AnalyzeContext context){
108 | 		if(nStart == -1 && nEnd == -1){//初始状态
109 | 			if(CharacterUtil.CHAR_CHINESE == context.getCurrentCharType() 
110 | 					&& ChnNumberChars.contains(context.getCurrentChar())){
111 | 				//记录数词的起始、结束位置
112 | 				nStart = context.getCursor();
113 | 				nEnd = context.getCursor();
114 | 			}
115 | 		}else{//正在处理状态
116 | 			if(CharacterUtil.CHAR_CHINESE == context.getCurrentCharType() 
117 | 					&& ChnNumberChars.contains(context.getCurrentChar())){
118 | 				//记录数词的结束位置
119 | 				nEnd = context.getCursor();
120 | 			}else{
121 | 				//输出数词
122 | 				this.outputNumLexeme(context);
123 | 				//重置头尾指针
124 | 				nStart = -1;
125 | 				nEnd = -1;
126 | 			}
127 | 		}
128 | 		
129 | 		//缓冲区已经用完，还有尚未输出的数词
130 | 		if(context.isBufferConsumed()){
131 | 			if(nStart != -1 && nEnd != -1){
132 | 				//输出数词
133 | 				outputNumLexeme(context);
134 | 				//重置头尾指针
135 | 				nStart = -1;
136 | 				nEnd = -1;
137 | 			}
138 | 		}	
139 | 	}
140 | 	
141 | 	/**
142 | 	 * 处理中文量词
143 | 	 * @param context
144 | 	 */
145 | 	private void processCount(AnalyzeContext context){
146 | 		// 判断是否需要启动量词扫描
147 | 		if(!this.needCountScan(context)){
148 | 			return;
149 | 		}
150 | 		
151 | 		if(CharacterUtil.CHAR_CHINESE == context.getCurrentCharType()){
152 | 			
153 | 			//优先处理countHits中的hit
154 | 			if(!this.countHits.isEmpty()){
155 | 				//处理词段队列
156 | 				Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
157 | 				for(Hit hit : tmpArray){
158 | 					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
159 | 					if(hit.isMatch()){
160 | 						//输出当前的词
161 | 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
162 | 						context.addLexeme(newLexeme);
163 | 						
164 | 						if(!hit.isPrefix()){//不是词前缀，hit不需要继续匹配，移除
165 | 							this.countHits.remove(hit);
166 | 						}
167 | 						
168 | 					}else if(hit.isUnmatch()){
169 | 						//hit不是词，移除
170 | 						this.countHits.remove(hit);
171 | 					}					
172 | 				}
173 | 			}				
174 | 
175 | 			//*********************************
176 | 			//对当前指针位置的字符进行单字匹配
177 | 			Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
178 | 			if(singleCharHit.isMatch()){//首字成量词词
179 | 				//输出当前的词
180 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);
181 | 				context.addLexeme(newLexeme);
182 | 
183 | 				//同时也是词前缀
184 | 				if(singleCharHit.isPrefix()){
185 | 					//前缀匹配则放入hit列表
186 | 					this.countHits.add(singleCharHit);
187 | 				}
188 | 			}else if(singleCharHit.isPrefix()){//首字为量词前缀
189 | 				//前缀匹配则放入hit列表
190 | 				this.countHits.add(singleCharHit);
191 | 			}
192 | 			
193 | 			
194 | 		}else{
195 | 			//输入的不是中文字符
196 | 			//清空未成形的量词
197 | 			this.countHits.clear();
198 | 		}
199 | 		
200 | 		//缓冲区数据已经读完，还有尚未输出的量词
201 | 		if(context.isBufferConsumed()){
202 | 			//清空未成形的量词
203 | 			this.countHits.clear();
204 | 		}
205 | 	}
206 | 	
207 | 	/**
208 | 	 * 判断是否需要扫描量词
209 | 	 * @return
210 | 	 */
211 | 	private boolean needCountScan(AnalyzeContext context){
212 | 		if((nStart != -1 && nEnd != -1 ) || !countHits.isEmpty()){
213 | 			//正在处理中文数词,或者正在处理量词
214 | 			return true;
215 | 		}else{
216 | 			//找到一个相邻的数词
217 | 			if(!context.getOrgLexemes().isEmpty()){
218 | 				Lexeme l = context.getOrgLexemes().peekLast();
219 | 				if(Lexeme.TYPE_CNUM == l.getLexemeType() ||  Lexeme.TYPE_ARABIC == l.getLexemeType()){
220 | 					if(l.getBegin() + l.getLength() == context.getCursor()){
221 | 						return true;
222 | 					}
223 | 				}
224 | 			}
225 | 		}
226 | 		return false;
227 | 	}
228 | 	
229 | 	/**
230 | 	 * 添加数词词元到结果集
231 | 	 * @param context
232 | 	 */
233 | 	private void outputNumLexeme(AnalyzeContext context){
234 | 		if(nStart > -1 && nEnd > -1){
235 | 			//输出数词
236 | 			Lexeme newLexeme = new Lexeme(context.getBufferOffset() , nStart , nEnd - nStart + 1 , Lexeme.TYPE_CNUM);
237 | 			context.addLexeme(newLexeme);
238 | 			
239 | 		}
240 | 	}
241 | 
242 | }
243 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/LexemePath.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | 
 28 | /**
 29 |  * Lexeme链（路径）
 30 |  */
 31 | class LexemePath extends QuickSortSet implements Comparable<LexemePath>{
 32 | 	
 33 | 	//起始位置
 34 | 	private int pathBegin;
 35 | 	//结束
 36 | 	private int pathEnd;
 37 | 	//词元链的有效字符长度
 38 | 	private int payloadLength;
 39 | 	
 40 | 	LexemePath(){
 41 | 		this.pathBegin = -1;
 42 | 		this.pathEnd = -1;
 43 | 		this.payloadLength = 0;
 44 | 	}
 45 | 
 46 | 	/**
 47 | 	 * 向LexemePath追加相交的Lexeme
 48 | 	 * @param lexeme
 49 | 	 * @return 
 50 | 	 */
 51 | 	boolean addCrossLexeme(Lexeme lexeme){
 52 | 		if(this.isEmpty()){
 53 | 			this.addLexeme(lexeme);
 54 | 			this.pathBegin = lexeme.getBegin();
 55 | 			this.pathEnd = lexeme.getBegin() + lexeme.getLength();
 56 | 			this.payloadLength += lexeme.getLength();
 57 | 			return true;
 58 | 			
 59 | 		}else if(this.checkCross(lexeme)){
 60 | 			this.addLexeme(lexeme);
 61 | 			if(lexeme.getBegin() + lexeme.getLength() > this.pathEnd){
 62 | 				this.pathEnd = lexeme.getBegin() + lexeme.getLength();
 63 | 			}
 64 | 			this.payloadLength = this.pathEnd - this.pathBegin;
 65 | 			return true;
 66 | 			
 67 | 		}else{
 68 | 			return  false;
 69 | 			
 70 | 		}
 71 | 	}
 72 | 	
 73 | 	/**
 74 | 	 * 向LexemePath追加不相交的Lexeme
 75 | 	 * @param lexeme
 76 | 	 * @return 
 77 | 	 */
 78 | 	boolean addNotCrossLexeme(Lexeme lexeme){
 79 | 		if(this.isEmpty()){
 80 | 			this.addLexeme(lexeme);
 81 | 			this.pathBegin = lexeme.getBegin();
 82 | 			this.pathEnd = lexeme.getBegin() + lexeme.getLength();
 83 | 			this.payloadLength += lexeme.getLength();
 84 | 			return true;
 85 | 			
 86 | 		}else if(this.checkCross(lexeme)){
 87 | 			return  false;
 88 | 			
 89 | 		}else{
 90 | 			this.addLexeme(lexeme);
 91 | 			this.payloadLength += lexeme.getLength();
 92 | 			Lexeme head = this.peekFirst();
 93 | 			this.pathBegin = head.getBegin();
 94 | 			Lexeme tail = this.peekLast();
 95 | 			this.pathEnd = tail.getBegin() + tail.getLength();
 96 | 			return true;
 97 | 			
 98 | 		}
 99 | 	}
100 | 	
101 | 	/**
102 | 	 * 移除尾部的Lexeme
103 | 	 * @return
104 | 	 */
105 | 	Lexeme removeTail(){
106 | 		Lexeme tail = this.pollLast();
107 | 		if(this.isEmpty()){
108 | 			this.pathBegin = -1;
109 | 			this.pathEnd = -1;
110 | 			this.payloadLength = 0;			
111 | 		}else{		
112 | 			this.payloadLength -= tail.getLength();
113 | 			Lexeme newTail = this.peekLast();
114 | 			this.pathEnd = newTail.getBegin() + newTail.getLength();
115 | 		}
116 | 		return tail;
117 | 	}
118 | 	
119 | 	/**
120 | 	 * 检测词元位置交叉（有歧义的切分）
121 | 	 * @param lexeme
122 | 	 * @return
123 | 	 */
124 | 	boolean checkCross(Lexeme lexeme){
125 | 		return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd)
126 | 				|| (this.pathBegin >= lexeme.getBegin() && this.pathBegin < lexeme.getBegin()+ lexeme.getLength());
127 | 	}
128 | 	
129 | 	int getPathBegin() {
130 | 		return pathBegin;
131 | 	}
132 | 
133 | 	int getPathEnd() {
134 | 		return pathEnd;
135 | 	}
136 | 
137 | 	/**
138 | 	 * 获取Path的有效词长
139 | 	 * @return
140 | 	 */
141 | 	int getPayloadLength(){
142 | 		return this.payloadLength;
143 | 	}
144 | 	
145 | 	/**
146 | 	 * 获取LexemePath的路径长度
147 | 	 * @return
148 | 	 */
149 | 	int getPathLength(){
150 | 		return this.pathEnd - this.pathBegin;
151 | 	}
152 | 	
153 | 
154 | 	/**
155 | 	 * X权重（词元长度积）
156 | 	 * @return
157 | 	 */
158 | 	int getXWeight(){
159 | 		int product = 1;
160 | 		Cell c = this.getHead();
161 | 		while( c != null && c.getLexeme() != null){
162 | 			product *= c.getLexeme().getLength();
163 | 			c = c.getNext();
164 | 		}
165 | 		return product;
166 | 	}
167 | 	
168 | 	/**
169 | 	 * 词元位置权重
170 | 	 * @return
171 | 	 */
172 | 	int getPWeight(){
173 | 		int pWeight = 0;
174 | 		int p = 0;
175 | 		Cell c = this.getHead();
176 | 		while( c != null && c.getLexeme() != null){
177 | 			p++;
178 | 			pWeight += p * c.getLexeme().getLength() ;
179 | 			c = c.getNext();
180 | 		}
181 | 		return pWeight;		
182 | 	}
183 | 	
184 | 	LexemePath copy(){
185 | 		LexemePath theCopy = new LexemePath();
186 | 		theCopy.pathBegin = this.pathBegin;
187 | 		theCopy.pathEnd = this.pathEnd;
188 | 		theCopy.payloadLength = this.payloadLength;
189 | 		Cell c = this.getHead();
190 | 		while( c != null && c.getLexeme() != null){
191 | 			theCopy.addLexeme(c.getLexeme());
192 | 			c = c.getNext();
193 | 		}
194 | 		return theCopy;
195 | 	}
196 | 
197 | 	public int compareTo(LexemePath o) {
198 | 		//比较有效文本长度
199 | 		if(this.payloadLength > o.payloadLength){
200 | 			return -1;
201 | 		}else if(this.payloadLength < o.payloadLength){
202 | 			return 1;
203 | 		}else{
204 | 			//比较词元个数，越少越好
205 | 			if(this.size() < o.size()){
206 | 				return -1;
207 | 			}else if (this.size() > o.size()){
208 | 				return 1;
209 | 			}else{
210 | 				//路径跨度越大越好
211 | 				if(this.getPathLength() >  o.getPathLength()){
212 | 					return -1;
213 | 				}else if(this.getPathLength() <  o.getPathLength()){
214 | 					return 1;
215 | 				}else {
216 | 					//根据统计学结论，逆向切分概率高于正向切分，因此位置越靠后的优先
217 | 					if(this.pathEnd > o.pathEnd){
218 | 						return -1;
219 | 					}else if(pathEnd < o.pathEnd){
220 | 						return 1;
221 | 					}else{
222 | 						//词长越平均越好
223 | 						if(this.getXWeight() > o.getXWeight()){
224 | 							return -1;
225 | 						}else if(this.getXWeight() < o.getXWeight()){
226 | 							return 1;
227 | 						}else {
228 | 							//词元位置权重比较
229 | 							if(this.getPWeight() > o.getPWeight()){
230 | 								return -1;
231 | 							}else if(this.getPWeight() < o.getPWeight()){
232 | 								return 1;
233 | 							}
234 | 							
235 | 						}
236 | 					}
237 | 				}
238 | 			}
239 | 		}
240 | 		return 0;
241 | 	}
242 | 	
243 | 	public String toString(){
244 | 		StringBuffer sb = new StringBuffer();
245 | 		sb.append("pathBegin  : ").append(pathBegin).append("\r\n");
246 | 		sb.append("pathEnd  : ").append(pathEnd).append("\r\n");
247 | 		sb.append("payloadLength  : ").append(payloadLength).append("\r\n");
248 | 		Cell head = this.getHead();
249 | 		while(head != null){
250 | 			sb.append("lexeme : ").append(head.getLexeme()).append("\r\n");
251 | 			head = head.getNext();
252 | 		}
253 | 		return sb.toString();
254 | 	}
255 | 
256 | }
257 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/Lexeme.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | /**
 28 |  * IK词元对象 
 29 |  */
 30 | public class Lexeme implements Comparable<Lexeme>{
 31 | 	//lexemeType常量
 32 | 	//未知
 33 | 	public static final int TYPE_UNKNOWN = 0;
 34 | 	//英文
 35 | 	public static final int TYPE_ENGLISH = 1;
 36 | 	//数字
 37 | 	public static final int TYPE_ARABIC = 2;
 38 | 	//英文数字混合
 39 | 	public static final int TYPE_LETTER = 3;
 40 | 	//中文词元
 41 | 	public static final int TYPE_CNWORD = 4;
 42 | 	//中文单字
 43 | 	public static final int TYPE_CNCHAR = 64;
 44 | 	//日韩文字
 45 | 	public static final int TYPE_OTHER_CJK = 8;
 46 | 	//中文数词
 47 | 	public static final int TYPE_CNUM = 16;
 48 | 	//中文量词
 49 | 	public static final int TYPE_COUNT = 32;
 50 | 	//中文数量词
 51 | 	public static final int TYPE_CQUAN = 48;
 52 | 	
 53 | 	//词元的起始位移
 54 | 	private int offset;
 55 |     //词元的相对起始位置
 56 |     private int begin;
 57 |     //词元的长度
 58 |     private int length;
 59 |     //词元文本
 60 |     private String lexemeText;
 61 |     //词元类型
 62 |     private int lexemeType;
 63 |     
 64 |     
 65 | 	public Lexeme(int offset , int begin , int length , int lexemeType){
 66 | 		this.offset = offset;
 67 | 		this.begin = begin;
 68 | 		if(length < 0){
 69 | 			throw new IllegalArgumentException("length < 0");
 70 | 		}
 71 | 		this.length = length;
 72 | 		this.lexemeType = lexemeType;
 73 | 	}
 74 | 	
 75 |     /*
 76 |      * 判断词元相等算法
 77 |      * 起始位置偏移、起始位置、终止位置相同
 78 |      * @see java.lang.Object#equals(Object o)
 79 |      */
 80 | 	public boolean equals(Object o){
 81 | 		if(o == null){
 82 | 			return false;
 83 | 		}
 84 | 		
 85 | 		if(this == o){
 86 | 			return true;
 87 | 		}
 88 | 		
 89 | 		if(o instanceof Lexeme){
 90 | 			Lexeme other = (Lexeme)o;
 91 | 			if(this.offset == other.getOffset()
 92 | 					&& this.begin == other.getBegin()
 93 | 					&& this.length == other.getLength()){
 94 | 				return true;			
 95 | 			}else{
 96 | 				return false;
 97 | 			}
 98 | 		}else{		
 99 | 			return false;
100 | 		}
101 | 	}
102 | 	
103 |     /*
104 |      * 词元哈希编码算法
105 |      * @see java.lang.Object#hashCode()
106 |      */
107 |     public int hashCode(){
108 |     	int absBegin = getBeginPosition();
109 |     	int absEnd = getEndPosition();
110 |     	return  (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
111 |     }
112 |     
113 |     /*
114 |      * 词元在排序集合中的比较算法
115 |      * @see java.lang.Comparable#compareTo(java.lang.Object)
116 |      */
117 | 	public int compareTo(Lexeme other) {
118 | 		//起始位置优先
119 |         if(this.begin < other.getBegin()){
120 |             return -1;
121 |         }else if(this.begin == other.getBegin()){
122 |         	//词元长度优先
123 |         	if(this.length > other.getLength()){
124 |         		return -1;
125 |         	}else if(this.length == other.getLength()){
126 |         		return 0;
127 |         	}else {//this.length < other.getLength()
128 |         		return 1;
129 |         	}
130 |         	
131 |         }else{//this.begin > other.getBegin()
132 |         	return 1;
133 |         }
134 | 	}
135 | 	
136 | 	public int getOffset() {
137 | 		return offset;
138 | 	}
139 | 
140 | 	public void setOffset(int offset) {
141 | 		this.offset = offset;
142 | 	}
143 | 
144 | 	public int getBegin() {
145 | 		return begin;
146 | 	}
147 | 	/**
148 | 	 * 获取词元在文本中的起始位置
149 | 	 * @return int
150 | 	 */
151 | 	public int getBeginPosition(){
152 | 		return offset + begin;
153 | 	}
154 | 
155 | 	public void setBegin(int begin) {
156 | 		this.begin = begin;
157 | 	}
158 | 
159 | 	/**
160 | 	 * 获取词元在文本中的结束位置
161 | 	 * @return int
162 | 	 */
163 | 	public int getEndPosition(){
164 | 		return offset + begin + length;
165 | 	}
166 | 	
167 | 	/**
168 | 	 * 获取词元的字符长度
169 | 	 * @return int
170 | 	 */
171 | 	public int getLength(){
172 | 		return this.length;
173 | 	}	
174 | 	
175 | 	public void setLength(int length) {
176 | 		if(this.length < 0){
177 | 			throw new IllegalArgumentException("length < 0");
178 | 		}
179 | 		this.length = length;
180 | 	}
181 | 	
182 | 	/**
183 | 	 * 获取词元的文本内容
184 | 	 * @return String
185 | 	 */
186 | 	public String getLexemeText() {
187 | 		if(lexemeText == null){
188 | 			return "";
189 | 		}
190 | 		return lexemeText;
191 | 	}
192 | 
193 | 	public void setLexemeText(String lexemeText) {
194 | 		if(lexemeText == null){
195 | 			this.lexemeText = "";
196 | 			this.length = 0;
197 | 		}else{
198 | 			this.lexemeText = lexemeText;
199 | 			this.length = lexemeText.length();
200 | 		}
201 | 	}
202 | 
203 | 	/**
204 | 	 * 获取词元类型
205 | 	 * @return int
206 | 	 */
207 | 	public int getLexemeType() {
208 | 		return lexemeType;
209 | 	}
210 | 	
211 | 	/**
212 | 	 * 获取词元类型标示字符串
213 | 	 * @return String
214 | 	 */
215 | 	public String getLexemeTypeString(){
216 | 		switch(lexemeType) {
217 | 
218 | 		case TYPE_ENGLISH :
219 | 			return "ENGLISH";
220 | 			
221 | 		case TYPE_ARABIC :
222 | 			return "ARABIC";
223 | 			
224 | 		case TYPE_LETTER :
225 | 			return "LETTER";
226 | 			
227 | 		case TYPE_CNWORD : 
228 | 			return "CN_WORD";
229 | 			
230 | 		case TYPE_CNCHAR : 
231 | 			return "CN_CHAR";
232 | 			
233 | 		case TYPE_OTHER_CJK :
234 | 			return "OTHER_CJK";
235 | 			
236 | 		case TYPE_COUNT :
237 | 			return "COUNT";
238 | 			
239 | 		case TYPE_CNUM :
240 | 			return "TYPE_CNUM";
241 | 			
242 | 		case TYPE_CQUAN:	
243 | 			return "TYPE_CQUAN";
244 | 			
245 | 		default :
246 | 			return "UNKONW";
247 | 		}
248 | 	}
249 | 		
250 | 
251 | 	public void setLexemeType(int lexemeType) {
252 | 		this.lexemeType = lexemeType;
253 | 	}
254 | 	
255 | 	/**
256 | 	 * 合并两个相邻的词元
257 | 	 * @param l
258 | 	 * @param lexemeType
259 | 	 * @return boolean 词元是否成功合并
260 | 	 */
261 | 	public boolean append(Lexeme l , int lexemeType){
262 | 		if(l != null && this.getEndPosition() == l.getBeginPosition()){
263 | 			this.length += l.getLength();
264 | 			this.lexemeType = lexemeType;
265 | 			return true;
266 | 		}else {
267 | 			return false;
268 | 		}
269 | 	}
270 | 	
271 | 
272 | 	/**
273 | 	 * 
274 | 	 */
275 | 	public String toString(){
276 | 		StringBuffer strbuf = new StringBuffer();
277 | 		strbuf.append(this.getBeginPosition()).append("-").append(this.getEndPosition());
278 | 		strbuf.append(" : ").append(this.lexemeText).append(" : \t");
279 | 		strbuf.append(this.getLexemeTypeString());
280 | 		return strbuf.toString();
281 | 	}
282 | 	
283 | 
284 | }
285 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/resources/mybatis/mybatis-3-mapper.dtd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" ?>
  2 | <!--
  3 | 
  4 |        Copyright 2009-2013 the original author or authors.
  5 | 
  6 |        Licensed under the Apache License, Version 2.0 (the "License");
  7 |        you may not use this file except in compliance with the License.
  8 |        You may obtain a copy of the License at
  9 | 
 10 |           http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |        Unless required by applicable law or agreed to in writing, software
 13 |        distributed under the License is distributed on an "AS IS" BASIS,
 14 |        WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |        See the License for the specific language governing permissions and
 16 |        limitations under the License.
 17 | 
 18 | -->
 19 | 
 20 | <!ELEMENT mapper (cache-ref | cache | resultMap* | parameterMap* | sql* | insert* | update* | delete* | select* )+>
 21 | <!ATTLIST mapper
 22 | xmlns:fo CDATA #IMPLIED
 23 | namespace CDATA #IMPLIED
 24 | >
 25 | 
 26 | <!ELEMENT cache-ref EMPTY>
 27 | <!ATTLIST cache-ref
 28 | namespace CDATA #REQUIRED
 29 | >
 30 | 
 31 | <!ELEMENT cache (property*)>
 32 | <!ATTLIST cache
 33 | type CDATA #IMPLIED
 34 | eviction CDATA #IMPLIED
 35 | flushInterval CDATA #IMPLIED
 36 | size CDATA #IMPLIED
 37 | readOnly CDATA #IMPLIED
 38 | >
 39 | 
 40 | <!ELEMENT parameterMap (parameter+)?>
 41 | <!ATTLIST parameterMap
 42 | id CDATA #REQUIRED
 43 | type CDATA #REQUIRED
 44 | >
 45 | 
 46 | <!ELEMENT parameter EMPTY>
 47 | <!ATTLIST parameter
 48 | property CDATA #REQUIRED
 49 | javaType CDATA #IMPLIED
 50 | jdbcType CDATA #IMPLIED
 51 | mode (IN | OUT | INOUT) #IMPLIED
 52 | resultMap CDATA #IMPLIED
 53 | scale CDATA #IMPLIED
 54 | typeHandler CDATA #IMPLIED
 55 | >
 56 | 
 57 | <!ELEMENT resultMap (constructor?,id*,result*,association*,collection*, discriminator?)>
 58 | <!ATTLIST resultMap
 59 | id CDATA #REQUIRED
 60 | type CDATA #REQUIRED
 61 | extends CDATA #IMPLIED
 62 | autoMapping (true|false) #IMPLIED
 63 | >
 64 | 
 65 | <!ELEMENT constructor (idArg*,arg*)>
 66 | 
 67 | <!ELEMENT id EMPTY>
 68 | <!ATTLIST id
 69 | property CDATA #IMPLIED
 70 | javaType CDATA #IMPLIED
 71 | column CDATA #IMPLIED
 72 | jdbcType CDATA #IMPLIED
 73 | typeHandler CDATA #IMPLIED
 74 | >
 75 | 
 76 | <!ELEMENT result EMPTY>
 77 | <!ATTLIST result
 78 | property CDATA #IMPLIED
 79 | javaType CDATA #IMPLIED
 80 | column CDATA #IMPLIED
 81 | jdbcType CDATA #IMPLIED
 82 | typeHandler CDATA #IMPLIED
 83 | >
 84 | 
 85 | <!ELEMENT idArg EMPTY>
 86 | <!ATTLIST idArg
 87 | javaType CDATA #IMPLIED
 88 | column CDATA #IMPLIED
 89 | jdbcType CDATA #IMPLIED
 90 | typeHandler CDATA #IMPLIED
 91 | select CDATA #IMPLIED
 92 | resultMap CDATA #IMPLIED
 93 | >
 94 | 
 95 | <!ELEMENT arg EMPTY>
 96 | <!ATTLIST arg
 97 | javaType CDATA #IMPLIED
 98 | column CDATA #IMPLIED
 99 | jdbcType CDATA #IMPLIED
100 | typeHandler CDATA #IMPLIED
101 | select CDATA #IMPLIED
102 | resultMap CDATA #IMPLIED
103 | >
104 | 
105 | <!ELEMENT collection (constructor?,id*,result*,association*,collection*, discriminator?)>
106 | <!ATTLIST collection
107 | property CDATA #REQUIRED
108 | column CDATA #IMPLIED
109 | javaType CDATA #IMPLIED
110 | ofType CDATA #IMPLIED
111 | jdbcType CDATA #IMPLIED
112 | select CDATA #IMPLIED
113 | resultMap CDATA #IMPLIED
114 | typeHandler CDATA #IMPLIED
115 | notNullColumn CDATA #IMPLIED
116 | columnPrefix CDATA #IMPLIED
117 | resultSet CDATA #IMPLIED
118 | foreignColumn CDATA #IMPLIED
119 | autoMapping (true|false) #IMPLIED
120 | >
121 | 
122 | <!ELEMENT association (constructor?,id*,result*,association*,collection*, discriminator?)>
123 | <!ATTLIST association
124 | property CDATA #REQUIRED
125 | column CDATA #IMPLIED
126 | javaType CDATA #IMPLIED
127 | jdbcType CDATA #IMPLIED
128 | select CDATA #IMPLIED
129 | resultMap CDATA #IMPLIED
130 | typeHandler CDATA #IMPLIED
131 | notNullColumn CDATA #IMPLIED
132 | columnPrefix CDATA #IMPLIED
133 | resultSet CDATA #IMPLIED
134 | foreignColumn CDATA #IMPLIED
135 | autoMapping (true|false) #IMPLIED
136 | >
137 | 
138 | <!ELEMENT discriminator (case+)>
139 | <!ATTLIST discriminator
140 | column CDATA #IMPLIED
141 | javaType CDATA #REQUIRED
142 | jdbcType CDATA #IMPLIED
143 | typeHandler CDATA #IMPLIED
144 | >
145 | 
146 | <!ELEMENT case (constructor?,id*,result*,association*,collection*, discriminator?)>
147 | <!ATTLIST case
148 | value CDATA #REQUIRED
149 | resultMap CDATA #IMPLIED
150 | resultType CDATA #IMPLIED
151 | >
152 | 
153 | <!ELEMENT property EMPTY>
154 | <!ATTLIST property
155 | name CDATA #REQUIRED
156 | value CDATA #REQUIRED
157 | >
158 | 
159 | <!ELEMENT typeAlias EMPTY>
160 | <!ATTLIST typeAlias
161 | alias CDATA #REQUIRED
162 | type CDATA #REQUIRED
163 | >
164 | 
165 | <!ELEMENT select (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
166 | <!ATTLIST select
167 | id CDATA #REQUIRED
168 | parameterMap CDATA #IMPLIED
169 | parameterType CDATA #IMPLIED
170 | resultMap CDATA #IMPLIED
171 | resultType CDATA #IMPLIED
172 | resultSetType (FORWARD_ONLY | SCROLL_INSENSITIVE | SCROLL_SENSITIVE) #IMPLIED
173 | statementType (STATEMENT|PREPARED|CALLABLE) #IMPLIED
174 | fetchSize CDATA #IMPLIED
175 | timeout CDATA #IMPLIED
176 | flushCache (true|false) #IMPLIED
177 | useCache (true|false) #IMPLIED
178 | databaseId CDATA #IMPLIED
179 | lang CDATA #IMPLIED
180 | resultOrdered (true|false) #IMPLIED
181 | resultSets CDATA #IMPLIED 
182 | >
183 | 
184 | <!ELEMENT insert (#PCDATA | selectKey | include | trim | where | set | foreach | choose | if | bind)*>
185 | <!ATTLIST insert
186 | id CDATA #REQUIRED
187 | parameterMap CDATA #IMPLIED
188 | parameterType CDATA #IMPLIED
189 | timeout CDATA #IMPLIED
190 | flushCache (true|false) #IMPLIED
191 | statementType (STATEMENT|PREPARED|CALLABLE) #IMPLIED
192 | keyProperty CDATA #IMPLIED
193 | useGeneratedKeys (true|false) #IMPLIED
194 | keyColumn CDATA #IMPLIED
195 | databaseId CDATA #IMPLIED
196 | lang CDATA #IMPLIED
197 | >
198 | 
199 | <!ELEMENT selectKey (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
200 | <!ATTLIST selectKey
201 | resultType CDATA #IMPLIED
202 | statementType (STATEMENT|PREPARED|CALLABLE) #IMPLIED
203 | keyProperty CDATA #IMPLIED
204 | order (BEFORE|AFTER) #IMPLIED
205 | databaseId CDATA #IMPLIED
206 | >
207 | 
208 | <!ELEMENT update (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
209 | <!ATTLIST update
210 | id CDATA #REQUIRED
211 | parameterMap CDATA #IMPLIED
212 | parameterType CDATA #IMPLIED
213 | timeout CDATA #IMPLIED
214 | flushCache (true|false) #IMPLIED
215 | statementType (STATEMENT|PREPARED|CALLABLE) #IMPLIED
216 | databaseId CDATA #IMPLIED
217 | lang CDATA #IMPLIED
218 | >
219 | 
220 | <!ELEMENT delete (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
221 | <!ATTLIST delete
222 | id CDATA #REQUIRED
223 | parameterMap CDATA #IMPLIED
224 | parameterType CDATA #IMPLIED
225 | timeout CDATA #IMPLIED
226 | flushCache (true|false) #IMPLIED
227 | statementType (STATEMENT|PREPARED|CALLABLE) #IMPLIED
228 | databaseId CDATA #IMPLIED
229 | lang CDATA #IMPLIED
230 | >
231 | 
232 | <!-- Dynamic -->
233 | 
234 | <!ELEMENT include EMPTY>
235 | <!ATTLIST include
236 | refid CDATA #REQUIRED
237 | >
238 | 
239 | <!ELEMENT bind EMPTY>
240 | <!ATTLIST bind
241 |  name CDATA #REQUIRED
242 |  value CDATA #REQUIRED
243 | >
244 | 
245 | <!ELEMENT sql (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
246 | <!ATTLIST sql
247 | id CDATA #REQUIRED
248 | lang CDATA #IMPLIED
249 | databaseId CDATA #IMPLIED
250 | >
251 | 
252 | <!ELEMENT trim (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
253 | <!ATTLIST trim
254 | prefix CDATA #IMPLIED
255 | prefixOverrides CDATA #IMPLIED
256 | suffix CDATA #IMPLIED
257 | suffixOverrides CDATA #IMPLIED
258 | >
259 | <!ELEMENT where (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
260 | <!ELEMENT set (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
261 | 
262 | <!ELEMENT foreach (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
263 | <!ATTLIST foreach
264 | collection CDATA #REQUIRED
265 | item CDATA #IMPLIED
266 | index CDATA #IMPLIED
267 | open CDATA #IMPLIED
268 | close CDATA #IMPLIED
269 | separator CDATA #IMPLIED
270 | >
271 | 
272 | <!ELEMENT choose (when* , otherwise?)>
273 | <!ELEMENT when (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
274 | <!ATTLIST when
275 | test CDATA #REQUIRED
276 | >
277 | <!ELEMENT otherwise (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
278 | 
279 | <!ELEMENT if (#PCDATA | include | trim | where | set | foreach | choose | if | bind)*>
280 | <!ATTLIST if
281 | test CDATA #REQUIRED
282 | >
283 | 
284 | 
285 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * IK 中文分词  版本 5.0
  3 |  * IK Analyzer release 5.0
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  6 |  * contributor license agreements.  See the NOTICE file distributed with
  7 |  * this work for additional information regarding copyright ownership.
  8 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  9 |  * (the "License"); you may not use this file except in compliance with
 10 |  * the License.  You may obtain a copy of the License at
 11 |  *
 12 |  *     http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  *
 20 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 21 |  * 版权声明 2012，乌龙茶工作室
 22 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 23 |  * 
 24 |  */
 25 | package org.wltea.analyzer.core;
 26 | 
 27 | import java.util.Arrays;
 28 | 
 29 | /**
 30 |  * 
 31 |  * 英文字符及阿拉伯数字子分词器
 32 |  */
 33 | class LetterSegmenter implements ISegmenter {
 34 | 	
 35 | 	//子分词器标签
 36 | 	static final String SEGMENTER_NAME = "LETTER_SEGMENTER";
 37 | 	//链接符号
 38 | 	private static final char[] Letter_Connector = new char[]{'#' , '&' , '+' , '-' , '.' , '@' , '_'};
 39 | 	
 40 | 	//数字符号
 41 | 	private static final char[] Num_Connector = new char[]{',' , '.'};
 42 | 	
 43 | 	/*
 44 | 	 * 词元的开始位置，
 45 | 	 * 同时作为子分词器状态标识
 46 | 	 * 当start > -1 时，标识当前的分词器正在处理字符
 47 | 	 */
 48 | 	private int start;
 49 | 	/*
 50 | 	 * 记录词元结束位置
 51 | 	 * end记录的是在词元中最后一个出现的Letter但非Sign_Connector的字符的位置
 52 | 	 */
 53 | 	private int end;
 54 | 	
 55 | 	/*
 56 | 	 * 字母起始位置
 57 | 	 */
 58 | 	private int englishStart;
 59 | 
 60 | 	/*
 61 | 	 * 字母结束位置
 62 | 	 */
 63 | 	private int englishEnd;
 64 | 	
 65 | 	/*
 66 | 	 * 阿拉伯数字起始位置
 67 | 	 */
 68 | 	private int arabicStart;
 69 | 	
 70 | 	/*
 71 | 	 * 阿拉伯数字结束位置
 72 | 	 */
 73 | 	private int arabicEnd;
 74 | 	
 75 | 	LetterSegmenter(){
 76 | 		Arrays.sort(Letter_Connector);
 77 | 		Arrays.sort(Num_Connector);
 78 | 		this.start = -1;
 79 | 		this.end = -1;
 80 | 		this.englishStart = -1;
 81 | 		this.englishEnd = -1;
 82 | 		this.arabicStart = -1;
 83 | 		this.arabicEnd = -1;
 84 | 	}
 85 | 
 86 | 
 87 | 	/* (non-Javadoc)
 88 | 	 * @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
 89 | 	 */
 90 | 	public void analyze(AnalyzeContext context) {
 91 | 		boolean bufferLockFlag = false;
 92 | 		//处理英文字母
 93 | 		bufferLockFlag = this.processEnglishLetter(context) || bufferLockFlag;
 94 | 		//处理阿拉伯字母
 95 | 		bufferLockFlag = this.processArabicLetter(context) || bufferLockFlag;
 96 | 		//处理混合字母(这个要放最后处理，可以通过QuickSortSet排除重复)
 97 | 		bufferLockFlag = this.processMixLetter(context) || bufferLockFlag;
 98 | 		
 99 | 		//判断是否锁定缓冲区
100 | 		if(bufferLockFlag){
101 | 			context.lockBuffer(SEGMENTER_NAME);
102 | 		}else{
103 | 			//对缓冲区解锁
104 | 			context.unlockBuffer(SEGMENTER_NAME);
105 | 		}
106 | 	}
107 | 	
108 | 	/* (non-Javadoc)
109 | 	 * @see org.wltea.analyzer.core.ISegmenter#reset()
110 | 	 */
111 | 	public void reset() {
112 | 		this.start = -1;
113 | 		this.end = -1;
114 | 		this.englishStart = -1;
115 | 		this.englishEnd = -1;
116 | 		this.arabicStart = -1;
117 | 		this.arabicEnd = -1;
118 | 	}	
119 | 	
120 | 	/**
121 | 	 * 处理数字字母混合输出
122 | 	 * 如：windos2000 | linliangyi2005@gmail.com
123 | 	 * @param input
124 | 	 * @param context
125 | 	 * @return
126 | 	 */
127 | 	private boolean processMixLetter(AnalyzeContext context){
128 | 		boolean needLock = false;
129 | 		
130 | 		if(this.start == -1){//当前的分词器尚未开始处理字符
131 | 			if(CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
132 | 					|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()){
133 | 				//记录起始指针的位置,标明分词器进入处理状态
134 | 				this.start = context.getCursor();
135 | 				this.end = start;
136 | 			}
137 | 			
138 | 		}else{//当前的分词器正在处理字符			
139 | 			if(CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
140 | 					|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()){
141 | 				//记录下可能的结束位置
142 | 				this.end = context.getCursor();
143 | 				
144 | 			}else if(CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
145 | 						&& this.isLetterConnector(context.getCurrentChar())){
146 | 				//记录下可能的结束位置
147 | 				this.end = context.getCursor();
148 | 			}else{
149 | 				//遇到非Letter字符，输出词元
150 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , this.start , this.end - this.start + 1 , Lexeme.TYPE_LETTER);
151 | 				context.addLexeme(newLexeme);
152 | 				this.start = -1;
153 | 				this.end = -1;
154 | 			}			
155 | 		}
156 | 		
157 | 		//判断缓冲区是否已经读完
158 | 		if(context.isBufferConsumed()){
159 | 			if(this.start != -1 && this.end != -1){
160 | 				//缓冲以读完，输出词元
161 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , this.start , this.end - this.start + 1 , Lexeme.TYPE_LETTER);
162 | 				context.addLexeme(newLexeme);
163 | 				this.start = -1;
164 | 				this.end = -1;
165 | 			}
166 | 		}
167 | 		
168 | 		//判断是否锁定缓冲区
169 | 		if(this.start == -1 && this.end == -1){
170 | 			//对缓冲区解锁
171 | 			needLock = false;
172 | 		}else{
173 | 			needLock = true;
174 | 		}
175 | 		return needLock;
176 | 	}
177 | 	
178 | 	/**
179 | 	 * 处理纯英文字母输出
180 | 	 * @param context
181 | 	 * @return
182 | 	 */
183 | 	private boolean processEnglishLetter(AnalyzeContext context){
184 | 		boolean needLock = false;
185 | 		
186 | 		if(this.englishStart == -1){//当前的分词器尚未开始处理英文字符	
187 | 			if(CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()){
188 | 				//记录起始指针的位置,标明分词器进入处理状态
189 | 				this.englishStart = context.getCursor();
190 | 				this.englishEnd = this.englishStart;
191 | 			}
192 | 		}else {//当前的分词器正在处理英文字符	
193 | 			if(CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()){
194 | 				//记录当前指针位置为结束位置
195 | 				this.englishEnd =  context.getCursor();
196 | 			}else{
197 | 				//遇到非English字符,输出词元
198 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , this.englishStart , this.englishEnd - this.englishStart + 1 , Lexeme.TYPE_ENGLISH);
199 | 				context.addLexeme(newLexeme);
200 | 				this.englishStart = -1;
201 | 				this.englishEnd= -1;
202 | 			}
203 | 		}
204 | 		
205 | 		//判断缓冲区是否已经读完
206 | 		if(context.isBufferConsumed()){
207 | 			if(this.englishStart != -1 && this.englishEnd != -1){
208 | 				//缓冲以读完，输出词元
209 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , this.englishStart , this.englishEnd - this.englishStart + 1 , Lexeme.TYPE_ENGLISH);
210 | 				context.addLexeme(newLexeme);
211 | 				this.englishStart = -1;
212 | 				this.englishEnd= -1;
213 | 			}
214 | 		}	
215 | 		
216 | 		//判断是否锁定缓冲区
217 | 		if(this.englishStart == -1 && this.englishEnd == -1){
218 | 			//对缓冲区解锁
219 | 			needLock = false;
220 | 		}else{
221 | 			needLock = true;
222 | 		}
223 | 		return needLock;			
224 | 	}
225 | 	
226 | 	/**
227 | 	 * 处理阿拉伯数字输出
228 | 	 * @param context
229 | 	 * @return
230 | 	 */
231 | 	private boolean processArabicLetter(AnalyzeContext context){
232 | 		boolean needLock = false;
233 | 		
234 | 		if(this.arabicStart == -1){//当前的分词器尚未开始处理数字字符	
235 | 			if(CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()){
236 | 				//记录起始指针的位置,标明分词器进入处理状态
237 | 				this.arabicStart = context.getCursor();
238 | 				this.arabicEnd = this.arabicStart;
239 | 			}
240 | 		}else {//当前的分词器正在处理数字字符	
241 | 			if(CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()){
242 | 				//记录当前指针位置为结束位置
243 | 				this.arabicEnd = context.getCursor();
244 | 			}else if(CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
245 | 					&& this.isNumConnector(context.getCurrentChar())){
246 | 				//不输出数字，但不标记结束
247 | 			}else{
248 | 				////遇到非Arabic字符,输出词元
249 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , this.arabicStart , this.arabicEnd - this.arabicStart + 1 , Lexeme.TYPE_ARABIC);
250 | 				context.addLexeme(newLexeme);
251 | 				this.arabicStart = -1;
252 | 				this.arabicEnd = -1;
253 | 			}
254 | 		}
255 | 		
256 | 		//判断缓冲区是否已经读完
257 | 		if(context.isBufferConsumed()){
258 | 			if(this.arabicStart != -1 && this.arabicEnd != -1){
259 | 				//生成已切分的词元
260 | 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() ,  this.arabicStart , this.arabicEnd - this.arabicStart + 1 , Lexeme.TYPE_ARABIC);
261 | 				context.addLexeme(newLexeme);
262 | 				this.arabicStart = -1;
263 | 				this.arabicEnd = -1;
264 | 			}
265 | 		}
266 | 		
267 | 		//判断是否锁定缓冲区
268 | 		if(this.arabicStart == -1 && this.arabicEnd == -1){
269 | 			//对缓冲区解锁
270 | 			needLock = false;
271 | 		}else{
272 | 			needLock = true;
273 | 		}
274 | 		return needLock;		
275 | 	}	
276 | 
277 | 	/**
278 | 	 * 判断是否是字母连接符号
279 | 	 * @param input
280 | 	 * @return
281 | 	 */
282 | 	private boolean isLetterConnector(char input){
283 | 		int index = Arrays.binarySearch(Letter_Connector, input);
284 | 		return index >= 0;
285 | 	}
286 | 	
287 | 	/**
288 | 	 * 判断是否是数字连接符号
289 | 	 * @param input
290 | 	 * @return
291 | 	 */
292 | 	private boolean isNumConnector(char input){
293 | 		int index = Arrays.binarySearch(Num_Connector, input);
294 | 		return index >= 0;
295 | 	}
296 | }
297 | 


--------------------------------------------------------------------------------
/xultimate-ikanalyzer/src/main/java/org/wltea/analyzer/dic/DictSegment.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 
  3 |  * IK 中文分词  版本 5.0
  4 |  * IK Analyzer release 5.0
  5 |  * 
  6 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  7 |  * contributor license agreements.  See the NOTICE file distributed with
  8 |  * this work for additional information regarding copyright ownership.
  9 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 10 |  * (the "License"); you may not use this file except in compliance with
 11 |  * the License.  You may obtain a copy of the License at
 12 |  *
 13 |  *     http://www.apache.org/licenses/LICENSE-2.0
 14 |  *
 15 |  * Unless required by applicable law or agreed to in writing, software
 16 |  * distributed under the License is distributed on an "AS IS" BASIS,
 17 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 |  * See the License for the specific language governing permissions and
 19 |  * limitations under the License.
 20 |  *
 21 |  * 源代码由林良益(linliangyi2005@gmail.com)提供
 22 |  * 版权声明 2012，乌龙茶工作室
 23 |  * provided by Linliangyi and copyright 2012 by Oolong studio
 24 |  * 
 25 |  */
 26 | package org.wltea.analyzer.dic;
 27 | 
 28 | import java.util.Arrays;
 29 | import java.util.HashMap;
 30 | import java.util.Map;
 31 | 
 32 | /**
 33 |  * 词典树分段，表示词典树的一个分枝
 34 |  */
 35 | class DictSegment implements Comparable<DictSegment>{
 36 | 	
 37 | 	//公用字典表，存储汉字
 38 | 	private static final Map<Character , Character> charMap = new HashMap<Character , Character>(16 , 0.95f);
 39 | 	//数组大小上限
 40 | 	private static final int ARRAY_LENGTH_LIMIT = 3;
 41 | 
 42 | 	
 43 | 	//Map存储结构
 44 | 	private Map<Character , DictSegment> childrenMap;
 45 | 	//数组方式存储结构
 46 | 	private DictSegment[] childrenArray;
 47 | 	
 48 | 	
 49 | 	//当前节点上存储的字符
 50 | 	private Character nodeChar;
 51 | 	//当前节点存储的Segment数目
 52 | 	//storeSize <=ARRAY_LENGTH_LIMIT ，使用数组存储， storeSize >ARRAY_LENGTH_LIMIT ,则使用Map存储
 53 | 	private int storeSize = 0;
 54 | 	//当前DictSegment状态 ,默认 0 , 1表示从根节点到当前节点的路径表示一个词
 55 | 	private int nodeState = 0;	
 56 | 	
 57 | 	
 58 | 	DictSegment(Character nodeChar){
 59 | 		if(nodeChar == null){
 60 | 			throw new IllegalArgumentException("参数为空异常，字符不能为空");
 61 | 		}
 62 | 		this.nodeChar = nodeChar;
 63 | 	}
 64 | 
 65 | 	Character getNodeChar() {
 66 | 		return nodeChar;
 67 | 	}
 68 | 	
 69 | 	/*
 70 | 	 * 判断是否有下一个节点
 71 | 	 */
 72 | 	boolean hasNextNode(){
 73 | 		return  this.storeSize > 0;
 74 | 	}
 75 | 	
 76 | 	/**
 77 | 	 * 匹配词段
 78 | 	 * @param charArray
 79 | 	 * @return Hit
 80 | 	 */
 81 | 	Hit match(char[] charArray){
 82 | 		return this.match(charArray , 0 , charArray.length , null);
 83 | 	}
 84 | 	
 85 | 	/**
 86 | 	 * 匹配词段
 87 | 	 * @param charArray
 88 | 	 * @param begin
 89 | 	 * @param length
 90 | 	 * @return Hit 
 91 | 	 */
 92 | 	Hit match(char[] charArray , int begin , int length){
 93 | 		return this.match(charArray , begin , length , null);
 94 | 	}
 95 | 	
 96 | 	/**
 97 | 	 * 匹配词段
 98 | 	 * @param charArray
 99 | 	 * @param begin
100 | 	 * @param length
101 | 	 * @param searchHit
102 | 	 * @return Hit 
103 | 	 */
104 | 	Hit match(char[] charArray , int begin , int length , Hit searchHit){
105 | 		
106 | 		if(searchHit == null){
107 | 			//如果hit为空，新建
108 | 			searchHit= new Hit();
109 | 			//设置hit的其实文本位置
110 | 			searchHit.setBegin(begin);
111 | 		}else{
112 | 			//否则要将HIT状态重置
113 | 			searchHit.setUnmatch();
114 | 		}
115 | 		//设置hit的当前处理位置
116 | 		searchHit.setEnd(begin);
117 | 		
118 | 		Character keyChar = new Character(charArray[begin]);
119 | 		DictSegment ds = null;
120 | 		
121 | 		//引用实例变量为本地变量，避免查询时遇到更新的同步问题
122 | 		DictSegment[] segmentArray = this.childrenArray;
123 | 		Map<Character , DictSegment> segmentMap = this.childrenMap;		
124 | 		
125 | 		//STEP1 在节点中查找keyChar对应的DictSegment
126 | 		if(segmentArray != null){
127 | 			//在数组中查找
128 | 			DictSegment keySegment = new DictSegment(keyChar);
129 | 			int position = Arrays.binarySearch(segmentArray, 0 , this.storeSize , keySegment);
130 | 			if(position >= 0){
131 | 				ds = segmentArray[position];
132 | 			}
133 | 
134 | 		}else if(segmentMap != null){
135 | 			//在map中查找
136 | 			ds = (DictSegment)segmentMap.get(keyChar);
137 | 		}
138 | 		
139 | 		//STEP2 找到DictSegment，判断词的匹配状态，是否继续递归，还是返回结果
140 | 		if(ds != null){			
141 | 			if(length > 1){
142 | 				//词未匹配完，继续往下搜索
143 | 				return ds.match(charArray, begin + 1 , length - 1 , searchHit);
144 | 			}else if (length == 1){
145 | 				
146 | 				//搜索最后一个char
147 | 				if(ds.nodeState == 1){
148 | 					//添加HIT状态为完全匹配
149 | 					searchHit.setMatch();
150 | 				}
151 | 				if(ds.hasNextNode()){
152 | 					//添加HIT状态为前缀匹配
153 | 					searchHit.setPrefix();
154 | 					//记录当前位置的DictSegment
155 | 					searchHit.setMatchedDictSegment(ds);
156 | 				}
157 | 				return searchHit;
158 | 			}
159 | 			
160 | 		}
161 | 		//STEP3 没有找到DictSegment， 将HIT设置为不匹配
162 | 		return searchHit;		
163 | 	}
164 | 
165 | 	/**
166 | 	 * 加载填充词典片段
167 | 	 * @param charArray
168 | 	 */
169 | 	void fillSegment(char[] charArray){
170 | 		this.fillSegment(charArray, 0 , charArray.length , 1); 
171 | 	}
172 | 	
173 | 	/**
174 | 	 * 屏蔽词典中的一个词
175 | 	 * @param charArray
176 | 	 */
177 | 	void disableSegment(char[] charArray){
178 | 		this.fillSegment(charArray, 0 , charArray.length , 0); 
179 | 	}
180 | 	
181 | 	/**
182 | 	 * 加载填充词典片段
183 | 	 * @param charArray
184 | 	 * @param begin
185 | 	 * @param length
186 | 	 * @param enabled
187 | 	 */
188 | 	private synchronized void fillSegment(char[] charArray , int begin , int length , int enabled){
189 | 		//获取字典表中的汉字对象
190 | 		Character beginChar = new Character(charArray[begin]);
191 | 		Character keyChar = charMap.get(beginChar);
192 | 		//字典中没有该字，则将其添加入字典
193 | 		if(keyChar == null){
194 | 			charMap.put(beginChar, beginChar);
195 | 			keyChar = beginChar;
196 | 		}
197 | 		
198 | 		//搜索当前节点的存储，查询对应keyChar的keyChar，如果没有则创建
199 | 		DictSegment ds = lookforSegment(keyChar , enabled);
200 | 		if(ds != null){
201 | 			//处理keyChar对应的segment
202 | 			if(length > 1){
203 | 				//词元还没有完全加入词典树
204 | 				ds.fillSegment(charArray, begin + 1, length - 1 , enabled);
205 | 			}else if (length == 1){
206 | 				//已经是词元的最后一个char,设置当前节点状态为enabled，
207 | 				//enabled=1表明一个完整的词，enabled=0表示从词典中屏蔽当前词
208 | 				ds.nodeState = enabled;
209 | 			}
210 | 		}
211 | 
212 | 	}
213 | 	
214 | 	/**
215 | 	 * 查找本节点下对应的keyChar的segment	 * 
216 | 	 * @param keyChar
217 | 	 * @param create  =1如果没有找到，则创建新的segment ; =0如果没有找到，不创建，返回null
218 | 	 * @return
219 | 	 */
220 | 	private DictSegment lookforSegment(Character keyChar ,  int create){
221 | 		
222 | 		DictSegment ds = null;
223 | 
224 | 		if(this.storeSize <= ARRAY_LENGTH_LIMIT){
225 | 			//获取数组容器，如果数组未创建则创建数组
226 | 			DictSegment[] segmentArray = getChildrenArray();			
227 | 			//搜寻数组
228 | 			DictSegment keySegment = new DictSegment(keyChar);
229 | 			int position = Arrays.binarySearch(segmentArray, 0 , this.storeSize, keySegment);
230 | 			if(position >= 0){
231 | 				ds = segmentArray[position];
232 | 			}
233 | 		
234 | 			//遍历数组后没有找到对应的segment
235 | 			if(ds == null && create == 1){
236 | 				ds = keySegment;
237 | 				if(this.storeSize < ARRAY_LENGTH_LIMIT){
238 | 					//数组容量未满，使用数组存储
239 | 					segmentArray[this.storeSize] = ds;
240 | 					//segment数目+1
241 | 					this.storeSize++;
242 | 					Arrays.sort(segmentArray , 0 , this.storeSize);
243 | 					
244 | 				}else{
245 | 					//数组容量已满，切换Map存储
246 | 					//获取Map容器，如果Map未创建,则创建Map
247 | 					Map<Character , DictSegment> segmentMap = getChildrenMap();
248 | 					//将数组中的segment迁移到Map中
249 | 					migrate(segmentArray ,  segmentMap);
250 | 					//存储新的segment
251 | 					segmentMap.put(keyChar, ds);
252 | 					//segment数目+1 ，  必须在释放数组前执行storeSize++ ， 确保极端情况下，不会取到空的数组
253 | 					this.storeSize++;
254 | 					//释放当前的数组引用
255 | 					this.childrenArray = null;
256 | 				}
257 | 
258 | 			}			
259 | 			
260 | 		}else{
261 | 			//获取Map容器，如果Map未创建,则创建Map
262 | 			Map<Character , DictSegment> segmentMap = getChildrenMap();
263 | 			//搜索Map
264 | 			ds = (DictSegment)segmentMap.get(keyChar);
265 | 			if(ds == null && create == 1){
266 | 				//构造新的segment
267 | 				ds = new DictSegment(keyChar);
268 | 				segmentMap.put(keyChar , ds);
269 | 				//当前节点存储segment数目+1
270 | 				this.storeSize ++;
271 | 			}
272 | 		}
273 | 
274 | 		return ds;
275 | 	}
276 | 	
277 | 	
278 | 	/**
279 | 	 * 获取数组容器
280 | 	 * 线程同步方法
281 | 	 */
282 | 	private DictSegment[] getChildrenArray(){
283 | 		if(this.childrenArray == null){
284 | 			synchronized(this){
285 | 				if(this.childrenArray == null){
286 | 					this.childrenArray = new DictSegment[ARRAY_LENGTH_LIMIT];
287 | 				}
288 | 			}
289 | 		}
290 | 		return this.childrenArray;
291 | 	}
292 | 	
293 | 	/**
294 | 	 * 获取Map容器
295 | 	 * 线程同步方法
296 | 	 */	
297 | 	private Map<Character , DictSegment> getChildrenMap(){
298 | 		if(this.childrenMap == null){
299 | 			synchronized(this){
300 | 				if(this.childrenMap == null){
301 | 					this.childrenMap = new HashMap<Character , DictSegment>(ARRAY_LENGTH_LIMIT * 2,0.8f);
302 | 				}
303 | 			}
304 | 		}
305 | 		return this.childrenMap;
306 | 	}
307 | 	
308 | 	/**
309 | 	 * 将数组中的segment迁移到Map中
310 | 	 * @param segmentArray
311 | 	 */
312 | 	private void migrate(DictSegment[] segmentArray , Map<Character , DictSegment> segmentMap){
313 | 		for(DictSegment segment : segmentArray){
314 | 			if(segment != null){
315 | 				segmentMap.put(segment.nodeChar, segment);
316 | 			}
317 | 		}
318 | 	}
319 | 
320 | 	/**
321 | 	 * 实现Comparable接口
322 | 	 * @param o
323 | 	 * @return int
324 | 	 */
325 | 	public int compareTo(DictSegment o) {
326 | 		//对当前节点存储的char进行比较
327 | 		return this.nodeChar.compareTo(o.nodeChar);
328 | 	}
329 | 	
330 | }
331 | 


--------------------------------------------------------------------------------