├── .github
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── enhancement_or_feature_request.md
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── LICENSE.txt
├── README.md
├── assets
└── banner.png
├── elasticsearch
├── pom.xml
└── src
│ └── main
│ ├── assemblies
│ └── plugin.xml
│ ├── java
│ └── com
│ │ └── infinilabs
│ │ └── elasticsearch
│ │ └── analysis
│ │ ├── AnalysisPinyinPlugin.java
│ │ ├── ESPinyinConfig.java
│ │ ├── PinyinAbbreviationsTokenizerFactory.java
│ │ ├── PinyinAnalyzerProvider.java
│ │ ├── PinyinTokenFilterFactory.java
│ │ └── PinyinTokenizerFactory.java
│ └── resources
│ └── plugin-descriptor.properties
├── opensearch
├── pom.xml
└── src
│ └── main
│ ├── assemblies
│ └── plugin.xml
│ ├── java
│ └── com
│ │ └── infinilabs
│ │ └── opensearch
│ │ └── analysis
│ │ ├── AnalysisPinyinPlugin.java
│ │ ├── ESPinyinConfig.java
│ │ ├── PinyinAbbreviationsTokenizerFactory.java
│ │ ├── PinyinAnalyzerProvider.java
│ │ ├── PinyinTokenFilterFactory.java
│ │ └── PinyinTokenizerFactory.java
│ └── resources
│ └── plugin-descriptor.properties
├── pinyin-core
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ ├── com
│ │ │ └── infinilabs
│ │ │ │ └── pinyin
│ │ │ │ └── analysis
│ │ │ │ ├── ChineseUtil.java
│ │ │ │ ├── ConfigErrorException.java
│ │ │ │ ├── PinyinAlphabetTokenizer.java
│ │ │ │ ├── PinyinAnalyzer.java
│ │ │ │ ├── PinyinConfig.java
│ │ │ │ ├── PinyinTokenFilter.java
│ │ │ │ ├── PinyinTokenizer.java
│ │ │ │ └── TermItem.java
│ │ └── org
│ │ │ └── nlpcn
│ │ │ └── commons
│ │ │ └── lang
│ │ │ ├── pinyin
│ │ │ ├── CaseType.java
│ │ │ ├── Pinyin.java
│ │ │ ├── PinyinFormat.java
│ │ │ ├── PinyinFormatter.java
│ │ │ ├── PinyinUtil.java
│ │ │ ├── PinyinWord.java
│ │ │ ├── ToneType.java
│ │ │ └── YuCharType.java
│ │ │ ├── tire
│ │ │ ├── GetWord.java
│ │ │ ├── SmartGetWord.java
│ │ │ ├── domain
│ │ │ │ ├── Forest.java
│ │ │ │ ├── SmartForest.java
│ │ │ │ └── Value.java
│ │ │ └── library
│ │ │ │ └── Library.java
│ │ │ └── util
│ │ │ ├── AnsjArrays.java
│ │ │ ├── CollectionUtil.java
│ │ │ ├── FileFinder.java
│ │ │ ├── FileIterator.java
│ │ │ ├── IOUtil.java
│ │ │ ├── MD5.java
│ │ │ ├── MapCount.java
│ │ │ ├── MapFactory.java
│ │ │ ├── MurmurHash.java
│ │ │ ├── ObjConver.java
│ │ │ ├── StringUtil.java
│ │ │ ├── WordAlert.java
│ │ │ ├── WordWeight.java
│ │ │ ├── logging
│ │ │ ├── JakartaCommonsLoggingImpl.java
│ │ │ ├── Jdk14LoggingImpl.java
│ │ │ ├── Log.java
│ │ │ ├── Log4j2Impl.java
│ │ │ ├── Log4jImpl.java
│ │ │ ├── LogFactory.java
│ │ │ ├── NoLoggingImpl.java
│ │ │ ├── Resources.java
│ │ │ └── SLF4JImpl.java
│ │ │ └── tuples
│ │ │ ├── Decade.java
│ │ │ ├── Ennead.java
│ │ │ ├── KeyValue.java
│ │ │ ├── LabelValue.java
│ │ │ ├── Octet.java
│ │ │ ├── Pair.java
│ │ │ ├── Quartet.java
│ │ │ ├── Quintet.java
│ │ │ ├── Septet.java
│ │ │ ├── Sextet.java
│ │ │ ├── Triplet.java
│ │ │ ├── Tuple.java
│ │ │ ├── Unit.java
│ │ │ └── valueintf
│ │ │ ├── IValue0.java
│ │ │ ├── IValue1.java
│ │ │ ├── IValue2.java
│ │ │ ├── IValue3.java
│ │ │ ├── IValue4.java
│ │ │ ├── IValue5.java
│ │ │ ├── IValue6.java
│ │ │ ├── IValue7.java
│ │ │ ├── IValue8.java
│ │ │ ├── IValue9.java
│ │ │ ├── IValueKey.java
│ │ │ ├── IValueLabel.java
│ │ │ └── IValueValue.java
│ └── resources
│ │ ├── pinyin.txt
│ │ ├── pinyin_alphabet.dict
│ │ └── polyphone.txt
│ └── test
│ ├── java
│ ├── com
│ │ └── infinilabs
│ │ │ └── pinyin
│ │ │ └── analysis
│ │ │ ├── PinyinAlphabetTokenizerTest.java
│ │ │ └── PinyinAnalysisTest.java
│ └── org
│ │ └── nlpcn
│ │ └── commons
│ │ └── lang
│ │ ├── TestUtils.java
│ │ ├── pinyin
│ │ └── PinyinTest.java
│ │ ├── tire
│ │ └── splitWord
│ │ │ ├── AllWordTest.java
│ │ │ ├── ForestTest.java
│ │ │ ├── GetWordTest.java
│ │ │ ├── LibraryTest.java
│ │ │ └── SmartGetWordTest.java
│ │ └── util
│ │ ├── FileFinderTest.java
│ │ ├── IOUtilTest.java
│ │ ├── StringUtilTest.java
│ │ ├── WordAlertTest.java
│ │ ├── WordWeightTest.java
│ │ └── logging
│ │ └── NLPLoggerTest.java
│ └── resources
│ ├── library.txt
│ ├── log4j.properties
│ ├── test.json
│ └── test_pinyin.dic
└── pom.xml
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a bug report to help fix a problem
4 | ---
5 |
6 | ### Description
7 |
8 | A description of what the bug is.
9 |
10 | ### Steps to reproduce
11 |
12 | 1. First step
13 | 2. Second step
14 | 3. Third step
15 |
16 | Priovde your configuration or code snippet that helps.
17 |
18 | ### Expected behavior
19 |
20 | A description of what you expected to happen.
21 |
22 | ### Actual behavior
23 |
24 | A description of what happens instead.
25 |
26 | ### Environment
27 |
28 | - Versions: [e.g. Elasticsearch 8.0.0]
29 | - Operating system and version: [e.g. macOS 10.14, Windows 10, Ubuntu 18.04]
30 | - [Linux] Desktop Environment and/or Window Manager: [e.g. Gnome, LXDE, i3]
31 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement_or_feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Enhancement or feature request
3 | about: Suggest an enhancement or feature
4 | ---
5 |
6 | ### Problem description
7 |
8 | A description of a problem, workflow or integration that your suggestion would solve.
9 | If the problem is OS-specific, include that information here.
10 |
11 | ### Preferred solution
12 |
13 | A description of what changes should be made to solve the problem.
14 |
15 | ### Alternatives
16 |
17 | A description of any alternative solutions or enhancements considered.
18 |
19 | ### Additional Information (optional)
20 |
21 | If applicable, add screenshots to help demonstrate the problem or proposed solution.
22 | Code examples or related links are useful, too.
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /data
2 | /work
3 | /logs
4 | /.idea
5 | /target
6 | .DS_Store
7 | *.iml
8 | /.project
9 | /.settings
10 | /.classpath
11 | /*.ipr
12 | /*.iws
13 | /*/target
14 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | jdk:
3 | - oraclejdk8
4 | install: true
5 | script:
6 | - sudo apt-get update && sudo apt-get install oracle-java8-installer
7 | - java -version
8 | language: java
9 | script: mvn clean package
10 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at contact@infini.ltd. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/infinilabs/analysis-pinyin/2d58347db2db6533bf31bd2d9be9c66b5e2c32a8/assets/banner.png
--------------------------------------------------------------------------------
/elasticsearch/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | analysis-pinyin
7 | com.infinilabs
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | elasticsearch-analysis-pinyin
13 | ${elasticsearch.version}
14 | Pinyin Analysis for Elasticsearch
15 | jar
16 |
17 | 9.0.0
18 | 1.8
19 |
20 | analysis-pinyin
21 | com.infinilabs.elasticsearch.analysis.AnalysisPinyinPlugin
22 | true
23 | UTF-8
24 |
25 |
26 |
27 |
28 |
29 | com.infinilabs
30 | pinyin-core
31 | ${project.parent.version}
32 |
33 |
34 |
35 | org.elasticsearch
36 | elasticsearch
37 | ${elasticsearch.version}
38 | compile
39 |
40 |
41 |
42 |
43 |
44 |
45 | maven-assembly-plugin
46 | 3.6.0
47 |
48 | false
49 | ${project.build.directory}/releases/
50 |
51 | elasticsearch/src/main/assemblies/plugin.xml
52 |
53 |
54 |
55 |
56 | distro-assembly
57 | package
58 |
59 | single
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/assemblies/plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | plugin
4 |
5 | zip
6 |
7 | false
8 |
9 |
10 | ${project.basedir}/../elasticsearch/src/main/resources/plugin-descriptor.properties
11 |
12 | true
13 |
14 |
15 |
16 |
17 | /
18 | true
19 | true
20 |
21 | org.elasticsearch:elasticsearch
22 |
23 |
24 |
25 | /
26 | true
27 | true
28 |
29 | org.apache.lucene:lucene-pinyin
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/AnalysisPinyinPlugin.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 |
4 | import org.apache.lucene.analysis.Analyzer;
5 | import org.elasticsearch.index.analysis.*;
6 | import org.elasticsearch.indices.analysis.AnalysisModule;
7 | import org.elasticsearch.plugins.AnalysisPlugin;
8 | import org.elasticsearch.plugins.Plugin;
9 |
10 | import java.util.Collections;
11 | import java.util.HashMap;
12 | import java.util.Map;
13 |
14 |
15 | public class AnalysisPinyinPlugin extends Plugin implements AnalysisPlugin {
16 |
17 | @Override
18 | public Map> getTokenizers() {
19 | Map> extra = new HashMap<>();
20 | extra.put("pinyin", PinyinTokenizerFactory::new);
21 | extra.put("pinyin_first_letter", PinyinAbbreviationsTokenizerFactory::new);
22 | return extra;
23 | }
24 |
25 | @Override
26 | public Map> getTokenFilters() {
27 | Map> extra = new HashMap<>();
28 | extra.put("pinyin", PinyinTokenFilterFactory::new);
29 | return extra;
30 | }
31 |
32 | @Override
33 | public Map>> getAnalyzers() {
34 | return Collections.singletonMap("pinyin", PinyinAnalyzerProvider::new);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/ESPinyinConfig.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import org.elasticsearch.common.settings.Settings;
5 |
6 | public class ESPinyinConfig extends PinyinConfig {
7 | public ESPinyinConfig() {
8 | }
9 |
10 | public ESPinyinConfig(Settings settings) {
11 | this.keepFirstLetter = settings.getAsBoolean("keep_first_letter", true);
12 | this.keepSeparateFirstLetter = settings.getAsBoolean("keep_separate_first_letter", false);
13 | this.keepFullPinyin = settings.getAsBoolean("keep_full_pinyin", true);
14 | this.keepJoinedFullPinyin = settings.getAsBoolean("keep_joined_full_pinyin", false);
15 | this.keepNoneChinese = settings.getAsBoolean("keep_none_chinese", true);
16 | this.keepNoneChineseTogether = settings.getAsBoolean("keep_none_chinese_together", true);
17 | this.noneChinesePinyinTokenize = settings.getAsBoolean("none_chinese_pinyin_tokenize", true);
18 | this.keepOriginal = settings.getAsBoolean("keep_original", false);
19 | this.LimitFirstLetterLength = settings.getAsInt("limit_first_letter_length", 16);
20 | this.lowercase = settings.getAsBoolean("lowercase", true);
21 | this.trimWhitespace = settings.getAsBoolean("trim_whitespace", true);
22 | this.keepNoneChineseInFirstLetter = settings.getAsBoolean("keep_none_chinese_in_first_letter", true);
23 | this.keepNoneChineseInJoinedFullPinyin = settings.getAsBoolean("keep_none_chinese_in_joined_full_pinyin", false);
24 | this.removeDuplicateTerm = settings.getAsBoolean("remove_duplicated_term", false);
25 | this.fixedPinyinOffset = settings.getAsBoolean("fixed_pinyin_offset", false);
26 | this.ignorePinyinOffset = settings.getAsBoolean("ignore_pinyin_offset", true);
27 | this.keepSeparateChinese = settings.getAsBoolean("keep_separate_chinese", false);
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/PinyinAbbreviationsTokenizerFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import com.infinilabs.pinyin.analysis.PinyinTokenizer;
5 | import org.apache.lucene.analysis.Tokenizer;
6 | import org.elasticsearch.common.settings.Settings;
7 | import org.elasticsearch.env.Environment;
8 | import org.elasticsearch.index.IndexSettings;
9 | import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
10 |
11 | public class PinyinAbbreviationsTokenizerFactory extends AbstractTokenizerFactory {
12 |
13 | public PinyinAbbreviationsTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
14 | super(name);
15 | }
16 |
17 | @Override
18 | public Tokenizer create() {
19 | PinyinConfig config=new ESPinyinConfig();
20 | config.keepFirstLetter=true;
21 | config.keepFullPinyin=false;
22 | config.keepNoneChinese=false;
23 | config.keepNoneChineseTogether=true;
24 | config.noneChinesePinyinTokenize=false;
25 | config.keepOriginal=false;
26 | config.lowercase=true;
27 | config.trimWhitespace=true;
28 | config.keepNoneChineseInFirstLetter=true;
29 | return new PinyinTokenizer(config);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/PinyinAnalyzerProvider.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinAnalyzer;
4 | import com.infinilabs.pinyin.analysis.PinyinConfig;
5 | import org.elasticsearch.common.settings.Settings;
6 | import org.elasticsearch.env.Environment;
7 | import org.elasticsearch.index.IndexSettings;
8 | import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
9 | import org.elasticsearch.injection.api.Inject;
10 |
11 |
12 | /*
13 | * Provider for the PinyinAnalyzer.
14 | */
15 | public class PinyinAnalyzerProvider extends AbstractIndexAnalyzerProvider {
16 |
17 | private final PinyinAnalyzer analyzer;
18 | private PinyinConfig config;
19 |
20 | @Inject
21 | public PinyinAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
22 | super(name);
23 | config=new ESPinyinConfig(settings);
24 | analyzer = new PinyinAnalyzer(config);
25 | }
26 |
27 | @Override
28 | public PinyinAnalyzer get() {
29 | return this.analyzer;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/PinyinTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 |
4 | import com.infinilabs.pinyin.analysis.PinyinConfig;
5 | import com.infinilabs.pinyin.analysis.PinyinTokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.elasticsearch.common.settings.Settings;
8 | import org.elasticsearch.env.Environment;
9 | import org.elasticsearch.index.IndexSettings;
10 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
11 |
12 | public class PinyinTokenFilterFactory extends AbstractTokenFilterFactory {
13 | private PinyinConfig config;
14 |
15 |
16 | public PinyinTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
17 | super(name);
18 | config=new ESPinyinConfig(settings);
19 | }
20 |
21 | @Override
22 | public TokenStream create(TokenStream tokenStream) {
23 | return new PinyinTokenFilter(tokenStream, config);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/infinilabs/elasticsearch/analysis/PinyinTokenizerFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.elasticsearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import com.infinilabs.pinyin.analysis.PinyinTokenizer;
5 | import org.apache.lucene.analysis.Tokenizer;
6 | import org.elasticsearch.common.settings.Settings;
7 | import org.elasticsearch.env.Environment;
8 | import org.elasticsearch.index.IndexSettings;
9 | import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
10 |
11 | public class PinyinTokenizerFactory extends AbstractTokenizerFactory {
12 |
13 | private PinyinConfig config;
14 |
15 | public PinyinTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
16 | super(name);
17 | config=new ESPinyinConfig(settings);
18 | }
19 |
20 | @Override
21 | public Tokenizer create() {
22 | return new PinyinTokenizer(config);
23 | }
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/elasticsearch/src/main/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | # Elasticsearch plugin descriptor file
2 | # This file must exist as 'plugin-descriptor.properties' at
3 | # the root directory of all plugins.
4 | #
5 | # A plugin can be 'site', 'jvm', or both.
6 | #
7 | ### example site plugin for "foo":
8 | #
9 | # foo.zip <-- zip file for the plugin, with this structure:
10 | # _site/ <-- the contents that will be served
11 | # plugin-descriptor.properties <-- example contents below:
12 | #
13 | # site=true
14 | # description=My cool plugin
15 | # version=1.0
16 | #
17 | ### example jvm plugin for "foo"
18 | #
19 | # foo.zip <-- zip file for the plugin, with this structure:
20 | # .jar <-- classes, resources, dependencies
21 | # .jar <-- any number of jars
22 | # plugin-descriptor.properties <-- example contents below:
23 | #
24 | # jvm=true
25 | # classname=foo.bar.BazPlugin
26 | # description=My cool plugin
27 | # version=2.0.0-rc1
28 | # elasticsearch.version=2.0
29 | # java.version=1.7
30 | #
31 | ### mandatory elements for all plugins:
32 | #
33 | # 'description': simple summary of the plugin
34 | description=${project.description}
35 | #
36 | # 'version': plugin's version
37 | version=${project.version}
38 | #
39 | # 'name': the plugin name
40 | name=${plugin.name}
41 |
42 | #
43 | # 'classname': the name of the class to load, fully-qualified.
44 | classname=${elasticsearch.plugin.classname}
45 | #
46 | # 'java.version' version of java the code is built against
47 | # use the system property java.specification.version
48 | # version string must be a sequence of nonnegative decimal integers
49 | # separated by "."'s and may have leading zeros
50 | java.version=${maven.compiler.target}
51 | #
52 | # 'elasticsearch.version' version of elasticsearch compiled against
53 | # You will have to release a new version of the plugin for each new
54 | # elasticsearch release. This version is checked when the plugin
55 | # is loaded so Elasticsearch will refuse to start in the presence of
56 | # plugins with the incorrect elasticsearch.version.
57 | elasticsearch.version=${elasticsearch.version}
58 |
--------------------------------------------------------------------------------
/opensearch/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | analysis-pinyin
7 | com.infinilabs
8 | 1.0
9 |
10 | 4.0.0
11 | opensearch-analysis-pinyin
12 | ${opensearch.version}
13 | Pinyin Analysis for OpenSearch
14 | jar
15 |
16 |
17 | 2.0.1
18 | 1.8
19 | analysis-pinyin
20 | com.infinilabs.opensearch.analysis.AnalysisPinyinPlugin
21 | UTF-8
22 |
23 |
24 |
25 |
26 | com.infinilabs
27 | pinyin-core
28 | ${project.parent.version}
29 |
30 |
31 | org.opensearch
32 | opensearch
33 | ${opensearch.version}
34 | compile
35 |
36 |
37 |
38 |
39 |
40 |
41 | maven-assembly-plugin
42 | 3.6.0
43 |
44 | false
45 | ${project.build.directory}/releases/
46 |
47 | opensearch/src/main/assemblies/plugin.xml
48 |
49 |
50 |
51 |
52 | distro-assembly
53 | package
54 |
55 | single
56 |
57 |
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/opensearch/src/main/assemblies/plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | plugin
4 |
5 | zip
6 |
7 | false
8 |
9 |
10 | ${project.basedir}/../opensearch/src/main/resources/plugin-descriptor.properties
11 |
12 | true
13 |
14 |
15 |
16 |
17 | /
18 | true
19 | true
20 |
21 | org.opensearch:opensearch
22 |
23 |
24 |
25 | /
26 | true
27 | true
28 |
29 | org.apache.lucene:lucene-pinyin
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/AnalysisPinyinPlugin.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 |
4 | import org.apache.lucene.analysis.Analyzer;
5 | import org.opensearch.index.analysis.*;
6 | import org.opensearch.indices.analysis.AnalysisModule;
7 | import org.opensearch.plugins.AnalysisPlugin;
8 | import org.opensearch.plugins.Plugin;
9 |
10 | import java.util.Collections;
11 | import java.util.HashMap;
12 | import java.util.Map;
13 |
14 |
15 | public class AnalysisPinyinPlugin extends Plugin implements AnalysisPlugin {
16 |
17 | @Override
18 | public Map> getTokenizers() {
19 | Map> extra = new HashMap<>();
20 | extra.put("pinyin", PinyinTokenizerFactory::new);
21 | extra.put("pinyin_first_letter", PinyinAbbreviationsTokenizerFactory::new);
22 | return extra;
23 | }
24 |
25 | @Override
26 | public Map> getTokenFilters() {
27 | Map> extra = new HashMap<>();
28 | extra.put("pinyin", PinyinTokenFilterFactory::new);
29 | return extra;
30 | }
31 |
32 | @Override
33 | public Map>> getAnalyzers() {
34 | return Collections.singletonMap("pinyin", PinyinAnalyzerProvider::new);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/ESPinyinConfig.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import org.opensearch.common.settings.Settings;
5 |
6 | public class ESPinyinConfig extends PinyinConfig {
7 | public ESPinyinConfig() {
8 | }
9 |
10 | public ESPinyinConfig(Settings settings) {
11 | this.keepFirstLetter = settings.getAsBoolean("keep_first_letter", true);
12 | this.keepSeparateFirstLetter = settings.getAsBoolean("keep_separate_first_letter", false);
13 | this.keepFullPinyin = settings.getAsBoolean("keep_full_pinyin", true);
14 | this.keepJoinedFullPinyin = settings.getAsBoolean("keep_joined_full_pinyin", false);
15 | this.keepNoneChinese = settings.getAsBoolean("keep_none_chinese", true);
16 | this.keepNoneChineseTogether = settings.getAsBoolean("keep_none_chinese_together", true);
17 | this.noneChinesePinyinTokenize = settings.getAsBoolean("none_chinese_pinyin_tokenize", true);
18 | this.keepOriginal = settings.getAsBoolean("keep_original", false);
19 | this.LimitFirstLetterLength = settings.getAsInt("limit_first_letter_length", 16);
20 | this.lowercase = settings.getAsBoolean("lowercase", true);
21 | this.trimWhitespace = settings.getAsBoolean("trim_whitespace", true);
22 | this.keepNoneChineseInFirstLetter = settings.getAsBoolean("keep_none_chinese_in_first_letter", true);
23 | this.keepNoneChineseInJoinedFullPinyin = settings.getAsBoolean("keep_none_chinese_in_joined_full_pinyin", false);
24 | this.removeDuplicateTerm = settings.getAsBoolean("remove_duplicated_term", false);
25 | this.fixedPinyinOffset = settings.getAsBoolean("fixed_pinyin_offset", false);
26 | this.ignorePinyinOffset = settings.getAsBoolean("ignore_pinyin_offset", true);
27 | this.keepSeparateChinese = settings.getAsBoolean("keep_separate_chinese", false);
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/PinyinAbbreviationsTokenizerFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import com.infinilabs.pinyin.analysis.PinyinTokenizer;
5 | import org.apache.lucene.analysis.Tokenizer;
6 | import org.opensearch.common.settings.Settings;
7 | import org.opensearch.env.Environment;
8 | import org.opensearch.index.IndexSettings;
9 | import org.opensearch.index.analysis.AbstractTokenizerFactory;
10 |
11 | public class PinyinAbbreviationsTokenizerFactory extends AbstractTokenizerFactory {
12 |
13 | public PinyinAbbreviationsTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
14 | super(indexSettings, settings, name);
15 | }
16 |
17 | @Override
18 | public Tokenizer create() {
19 | PinyinConfig config=new ESPinyinConfig();
20 | config.keepFirstLetter=true;
21 | config.keepFullPinyin=false;
22 | config.keepNoneChinese=false;
23 | config.keepNoneChineseTogether=true;
24 | config.noneChinesePinyinTokenize=false;
25 | config.keepOriginal=false;
26 | config.lowercase=true;
27 | config.trimWhitespace=true;
28 | config.keepNoneChineseInFirstLetter=true;
29 | return new PinyinTokenizer(config);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/PinyinAnalyzerProvider.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinAnalyzer;
4 | import com.infinilabs.pinyin.analysis.PinyinConfig;
5 | import org.opensearch.common.inject.Inject;
6 | import org.opensearch.common.settings.Settings;
7 | import org.opensearch.env.Environment;
8 | import org.opensearch.index.IndexSettings;
9 | import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
10 |
11 | /**
12 | *
13 | */
14 | public class PinyinAnalyzerProvider extends AbstractIndexAnalyzerProvider {
15 |
16 | private final PinyinAnalyzer analyzer;
17 | private PinyinConfig config;
18 |
19 | @Inject
20 | public PinyinAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
21 | super(indexSettings, name, settings);
22 | config = new ESPinyinConfig(settings);
23 | analyzer = new PinyinAnalyzer(config);
24 | }
25 |
26 | @Override
27 | public PinyinAnalyzer get() {
28 | return this.analyzer;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/PinyinTokenFilterFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 |
4 | import com.infinilabs.pinyin.analysis.PinyinConfig;
5 | import com.infinilabs.pinyin.analysis.PinyinTokenFilter;
6 | import org.apache.lucene.analysis.TokenStream;
7 | import org.opensearch.common.settings.Settings;
8 | import org.opensearch.env.Environment;
9 | import org.opensearch.index.IndexSettings;
10 | import org.opensearch.index.analysis.AbstractTokenFilterFactory;
11 |
12 | public class PinyinTokenFilterFactory extends AbstractTokenFilterFactory {
13 | private PinyinConfig config;
14 |
15 |
16 | public PinyinTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
17 | super(indexSettings, name, settings);
18 | config = new ESPinyinConfig(settings);
19 | }
20 |
21 | @Override
22 | public TokenStream create(TokenStream tokenStream) {
23 | return new PinyinTokenFilter(tokenStream, config);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/opensearch/src/main/java/com/infinilabs/opensearch/analysis/PinyinTokenizerFactory.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.opensearch.analysis;
2 |
3 | import com.infinilabs.pinyin.analysis.PinyinConfig;
4 | import com.infinilabs.pinyin.analysis.PinyinTokenizer;
5 | import org.apache.lucene.analysis.Tokenizer;
6 | import org.opensearch.common.settings.Settings;
7 | import org.opensearch.env.Environment;
8 | import org.opensearch.index.IndexSettings;
9 | import org.opensearch.index.analysis.AbstractTokenizerFactory;
10 |
11 | public class PinyinTokenizerFactory extends AbstractTokenizerFactory {
12 |
13 | private PinyinConfig config;
14 |
15 | public PinyinTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
16 | super(indexSettings, settings, name);
17 | config = new ESPinyinConfig(settings);
18 | }
19 |
20 | @Override
21 | public Tokenizer create() {
22 | return new PinyinTokenizer(config);
23 | }
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/opensearch/src/main/resources/plugin-descriptor.properties:
--------------------------------------------------------------------------------
1 | #
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 | # The OpenSearch Contributors require contributions made to
5 | # this file be licensed under the Apache-2.0 license or a
6 | # compatible open source license.
7 | #
8 | # Modifications Copyright OpenSearch Contributors. See
9 | # GitHub history for details.
10 | #
11 |
12 | # OpenSearch plugin descriptor file
13 | # This file must exist as 'plugin-descriptor.properties' inside a plugin.
14 | #
15 | ### example plugin for "foo"
16 | #
17 | # foo.zip <-- zip file for the plugin, with this structure:
18 | # |____ .jar <-- classes, resources, dependencies
19 | # |____ .jar <-- any number of jars
20 | # |____ plugin-descriptor.properties <-- example contents below:
21 | #
22 | # classname=foo.bar.BazPlugin
23 | # description=My cool plugin
24 | # version=6.0
25 | # opensearch.version=6.0
26 | # java.version=1.8
27 | #
28 | ### mandatory elements for all plugins:
29 | #
30 | # 'description': simple summary of the plugin
31 | description=${description}
32 | #
33 | # 'version': plugin's version
34 | version=${project.version}
35 | #
36 | # 'name': the plugin name
37 | name=${plugin.name}
38 | #
39 | # 'classname': the name of the class to load, fully-qualified
40 | classname=${opensearch.plugin.classname}
41 | #
42 | # 'java.version': version of java the code is built against
43 | # use the system property java.specification.version
44 | # version string must be a sequence of nonnegative decimal integers
45 | # separated by "."'s and may have leading zeros
46 | java.version=${maven.compiler.target}
47 | #
48 | # 'opensearch.version': semantic version of opensearch the plugin is compatible with
49 | # does not include -SNAPSHOT if compiled against a snapshot build
50 | opensearch.version=${opensearch.version}
51 | #
52 | ### optional elements for plugins:
53 | #
54 | # 'custom.foldername': the custom name of the folder in which the plugin is installed
55 | custom.foldername=
56 | #
57 | # 'extended.plugins': other plugins this plugin extends through SPI
58 | extended.plugins=
59 | #
60 | # 'has.native.controller': whether or not the plugin has a native controller
61 | has.native.controller=
62 |
--------------------------------------------------------------------------------
/pinyin-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | analysis-pinyin
7 | com.infinilabs
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | pinyin-core
13 | jar
14 |
15 |
16 | 1.8
17 | 1.8
18 | UTF-8
19 |
20 |
21 |
22 |
23 | org.apache.lucene
24 | lucene-core
25 | provided
26 | ${lucene.version}
27 |
28 |
29 | org.apache.lucene
30 | lucene-analysis-common
31 | provided
32 | ${lucene.version}
33 |
34 |
35 |
36 | org.slf4j
37 | slf4j-api
38 | 1.7.7
39 | provided
40 |
41 |
42 |
43 | commons-logging
44 | commons-logging
45 | 1.2
46 | provided
47 |
48 |
49 |
50 | log4j
51 | log4j
52 | 1.2.17
53 | provided
54 |
55 |
56 | org.apache.logging.log4j
57 | log4j-api
58 | 2.5
59 | provided
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/ChineseUtil.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | import org.nlpcn.commons.lang.util.StringUtil;
4 |
5 | import java.util.ArrayList;
6 | import java.util.Collections;
7 | import java.util.LinkedList;
8 | import java.util.List;
9 |
10 | public class ChineseUtil {
11 | /**
12 | * 汉字始
13 | */
14 | public static char CJK_UNIFIED_IDEOGRAPHS_START = '\u4E00';
15 | /**
16 | * 汉字止
17 | */
18 | public static char CJK_UNIFIED_IDEOGRAPHS_END = '\u9FA5';
19 |
20 | public static List segmentChinese(String str){
21 | if (StringUtil.isBlank(str)) {
22 | return Collections.emptyList();
23 | }
24 |
25 | List lists = str.length()<=32767?new ArrayList<>(str.length()):new LinkedList<>();
26 | for (int i=0;i=CJK_UNIFIED_IDEOGRAPHS_START&&c<=CJK_UNIFIED_IDEOGRAPHS_END){
29 | lists.add(String.valueOf(c));
30 | }
31 | else{
32 | lists.add(null);
33 | }
34 |
35 | }
36 | return lists;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/ConfigErrorException.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | /**
4 | * Created by medcl on 16/8/22.
5 | */
6 | public class ConfigErrorException extends RuntimeException {
7 | private final String mesage;
8 |
9 | public ConfigErrorException(String message) {
10 | this.mesage=message;
11 | }
12 | public String getMessage() {
13 | return this.mesage;
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/PinyinAlphabetTokenizer.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.InputStream;
5 | import java.io.InputStreamReader;
6 | import java.util.*;
7 |
8 | /**
9 | * Created by medcl on 16/10/13.
10 | */
11 | public class PinyinAlphabetTokenizer {
12 |
13 | private static final int PINYIN_MAX_LENGTH = 6;
14 |
15 | public static List walk(String text) {
16 | return segPinyinStr(text);
17 | }
18 |
19 | private static List segPinyinStr(String content) {
20 | String pinyinStr = content;
21 | pinyinStr = pinyinStr.toLowerCase();
22 | // 按非letter切分
23 | List pinyinStrList = splitByNoletter(pinyinStr);
24 | List pinyinList = new ArrayList<>();
25 | for (String pinyinText : pinyinStrList) {
26 | if (pinyinText.length() == 1) {
27 | pinyinList.add(pinyinText);
28 | } else {
29 | List forward = positiveMaxMatch(pinyinText, PINYIN_MAX_LENGTH);
30 | if (forward.size() == 1) { // 前向只切出1个的话,没有必要再做逆向分词
31 | pinyinList.addAll(forward);
32 | } else {
33 | // 分别正向、逆向最大匹配,选出最短的作为最优结果
34 | List backward = reverseMaxMatch(pinyinText, PINYIN_MAX_LENGTH);
35 | if (forward.size() <= backward.size()) {
36 | pinyinList.addAll(forward);
37 | } else {
38 | pinyinList.addAll(backward);
39 | }
40 | }
41 | }
42 | }
43 | return pinyinList;
44 | }
45 |
46 | private static List splitByNoletter(String pinyinStr) {
47 | List pinyinStrList = new ArrayList<>();
48 | StringBuffer sb = new StringBuffer();
49 | boolean lastWord = true;
50 | for (char c : pinyinStr.toCharArray()) {
51 | if ((c > 96 && c < 123) || (c > 64 && c < 91)) {
52 | if (!lastWord){
53 | pinyinStrList.add(sb.toString());
54 | sb.setLength(0);
55 | }
56 | sb.append(c);
57 | lastWord = true;
58 | } else {
59 | if (lastWord & sb.length()>0) {
60 | pinyinStrList.add(sb.toString());
61 | sb.setLength(0);
62 | }
63 | sb.append(c);
64 | lastWord = false;
65 | }
66 | }
67 | if (sb.length() > 0) {
68 | pinyinStrList.add(sb.toString());
69 | }
70 | return pinyinStrList;
71 |
72 | }
73 |
74 | private static List positiveMaxMatch(String pinyinText, int maxLength) {
75 |
76 | List pinyinList = new ArrayList<>();
77 | StringBuffer noMatchBuffer = new StringBuffer();
78 | for (int start = 0; start < pinyinText.length(); ) {
79 | int end = start + maxLength;
80 | if (end > pinyinText.length()) {
81 | end = pinyinText.length();
82 | }
83 | if (start == end) {
84 | break;
85 | }
86 | String sixStr = pinyinText.substring(start, end);
87 | boolean match = false;
88 | for (int j = 0; j < sixStr.length(); j++) {
89 | String guess = sixStr.substring(0, sixStr.length() - j);
90 | if (PinyinAlphabetDict.getInstance().match(guess)) {
91 | pinyinList.add(guess);
92 | start += guess.length();
93 | match = true;
94 | break;
95 | }
96 | }
97 | if (!match) { //没命中,向后移动一位
98 | noMatchBuffer.append(sixStr.substring(0, 1));
99 | start++;
100 | }else { // 命中,加上之前没命中的,并清空
101 | if (noMatchBuffer.length() > 0) {
102 | pinyinList.add(noMatchBuffer.toString());
103 | noMatchBuffer.setLength(0);
104 | }
105 | }
106 | }
107 | if (noMatchBuffer.length() > 0) {
108 | pinyinList.add(noMatchBuffer.toString());
109 | noMatchBuffer.setLength(0);
110 | }
111 |
112 | return pinyinList;
113 | }
114 |
115 | private static List reverseMaxMatch(String pinyinText, int maxLength) {
116 | List pinyinList = new ArrayList<>();
117 | StringBuffer noMatchBuffer = new StringBuffer();
118 | for (int end = pinyinText.length(); end >= 0; ) {
119 | int start = end - maxLength;
120 | if (start < 0) {
121 | start = 0;
122 | }
123 | if (start == end) {
124 | break;
125 | }
126 | boolean match = false;
127 | String sixStr = pinyinText.substring(start, end);
128 | for (int j = 0; j < sixStr.length(); j++) {
129 | String guess = sixStr.substring(j);
130 | if (PinyinAlphabetDict.getInstance().match(guess)) {
131 | pinyinList.add(guess);
132 | end -= guess.length();
133 | match = true;
134 | break;
135 | }
136 | }
137 | if (!match) { //一个也没命中
138 | noMatchBuffer.append(sixStr.substring(sixStr.length() - 1));
139 | end--;
140 | } else {
141 | if (noMatchBuffer.length() > 0) {
142 | pinyinList.add(noMatchBuffer.toString());
143 | noMatchBuffer.setLength(0);
144 | }
145 | }
146 | }
147 |
148 | if (noMatchBuffer.length() > 0) {
149 | pinyinList.add(noMatchBuffer.toString());
150 | noMatchBuffer.setLength(0);
151 | }
152 | // reverse 保持切词顺序
153 | Collections.reverse(pinyinList);
154 | return pinyinList;
155 | }
156 |
157 |
158 | }
159 |
160 | class PinyinAlphabetDict {
161 |
162 | private static final String fileName = "/pinyin_alphabet.dict";
163 |
164 | private Set alphabet = new HashSet();
165 |
166 | private static PinyinAlphabetDict instance;
167 |
168 | private PinyinAlphabetDict() {
169 | InputStream in = PinyinAlphabetDict.class.getResourceAsStream(fileName);
170 | BufferedReader reader = new BufferedReader(new InputStreamReader(in));
171 | try {
172 | String line;
173 | while (null != (line = reader.readLine())) {
174 | if (line.trim().length() > 0) {
175 | alphabet.add(line);
176 | }
177 | }
178 | } catch (Exception ex) {
179 | throw new RuntimeException("read pinyin dic error.", ex);
180 | } finally {
181 | try {
182 | reader.close();
183 | } catch (Exception ignored) {
184 | }
185 | }
186 | }
187 |
188 | public static PinyinAlphabetDict getInstance() {
189 | if (instance == null) {
190 | synchronized (PinyinAlphabetDict.class) {
191 | if (instance == null) {
192 | instance = new PinyinAlphabetDict();
193 | }
194 | }
195 | }
196 | return instance;
197 | }
198 |
199 | public boolean match(String c) {
200 | return alphabet.contains(c);
201 | }
202 | }
203 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/PinyinAnalyzer.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | import org.apache.lucene.analysis.Analyzer;
4 |
5 | /**
6 | * Created by IntelliJ IDEA.
7 | * User: Medcl'
8 | * Date: 12-5-22
9 | * Time: 上午10:39
10 | */
11 | public final class PinyinAnalyzer extends Analyzer {
12 |
13 | private PinyinConfig config;
14 |
15 | public PinyinAnalyzer(PinyinConfig config) {
16 | this.config=config;
17 | }
18 |
19 | @Override
20 | protected TokenStreamComponents createComponents(String fieldName) {
21 | return new TokenStreamComponents(new PinyinTokenizer(config));
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/PinyinConfig.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | /**
4 | * Created by medcl on 15/11/26.
5 | */
6 | public class PinyinConfig {
7 |
8 | public boolean lowercase=true;
9 | public boolean trimWhitespace=true;
10 | public boolean keepNoneChinese=true;
11 | public boolean keepNoneChineseInFirstLetter =true;
12 | public boolean keepNoneChineseInJoinedFullPinyin =false;
13 | public boolean keepOriginal=false;
14 | public boolean keepFirstLetter=true;
15 | public boolean keepSeparateFirstLetter=false;
16 | public boolean keepNoneChineseTogether=true;
17 | public boolean noneChinesePinyinTokenize =true;
18 | public int LimitFirstLetterLength=16;
19 | public boolean keepFullPinyin=true;
20 | public boolean keepJoinedFullPinyin =false;
21 | public boolean removeDuplicateTerm=false;
22 | public boolean fixedPinyinOffset =false;
23 | // after 6.0, offset is strictly constrained, overlapped tokens are not allowed, with this parameter, overlapped token will allowed by ignore offset, please note, all position related query or highlight will become incorrect, you should use multi fields and specify different settings for different query purpose. if you need offset, please set it to false. default: true.
24 | public boolean ignorePinyinOffset =true;
25 | public boolean keepSeparateChinese=false;
26 |
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/com/infinilabs/pinyin/analysis/TermItem.java:
--------------------------------------------------------------------------------
1 | package com.infinilabs.pinyin.analysis;
2 |
3 | /**
4 | * Created by IntelliJ IDEA.
5 | * User: Medcl'
6 | * Date: 12-5-21
7 | * Time: 下午5:53
8 | */
9 |
10 | public class TermItem implements Comparable{
11 | String term;
12 | int startOffset;
13 | int endOffset;
14 | int position;
15 | public TermItem(String term,int startOffset,int endOffset,int position){
16 | this.term=term;
17 | this.startOffset=startOffset;
18 | this.endOffset=endOffset;
19 | this.position=position;
20 | }
21 |
22 | @Override
23 | public String toString() {
24 | return term;
25 | }
26 |
27 | @Override
28 | public int compareTo(TermItem o) {
29 | return this.position-o.position;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/pinyin/CaseType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * File : CaseType.java
3 | * Created : 2014年1月22日
4 | * By : luhuiguo
5 | */
6 | package org.nlpcn.commons.lang.pinyin;
7 |
8 | /**
9 | * Define the output case of Pinyin string
10 | *
11 | *
12 | * This class provides several options for outputted cases of Pinyin string,
13 | * which are listed below. For example, Chinese character '民'
14 | *
15 | *
First, look for an "a" or an "e". If either vowel appears, it takes
106 | * the tone mark. There are no possible pinyin syllables that contain both
107 | * an "a" and an "e".
108 | *
109 | *
If there is no "a" or "e", look for an "ou". If "ou" appears, then
110 | * the "o" takes the tone mark.
111 | *
112 | *
If none of the above cases hold, then the last vowel in the syllable
113 | * takes the tone mark.
114 | *
115 | *
12 | * Chinese has four pitched tones and a "toneless" tone. They are called Píng(平,
13 | * flat), Shǎng(上, rise), Qù(去, high drop), Rù(入, drop) and Qing(轻, toneless).
14 | * Usually, we use 1, 2, 3, 4 and 5 to represent them.
15 | *
16 | *
17 | * This class provides several options for output of Chinese tones, which are
18 | * listed below. For example, Chinese character '打'
19 | *
20 | *
12 | * 'ü' is a special character of Hanyu Pinyin, which can not be simply
13 | * represented by English letters. In Hanyu Pinyin, such characters include 'ü',
14 | * 'üe', 'üan', and 'ün'.
15 | *
16 | *
17 | * This class provides several options for output of 'ü', which are listed
18 | * below.
19 | *
20 | *
21 | *
22 | *
Options
23 | *
Output
24 | *
25 | *
26 | *
WITH_U_AND_COLON
27 | *
u:
28 | *
29 | *
30 | *
WITH_V
31 | *
v
32 | *
33 | *
34 | *
WITH_U_UNICODE
35 | *
ü
36 | *
37 | *
38 | *
39 | * @author luhuiguo
40 | */
41 | public enum YuCharType {
42 |
43 | /**
44 | * The option indicates that the output of 'ü' is "u:".
45 | */
46 | WITH_U_AND_COLON,
47 |
48 | /**
49 | * The option indicates that the output of 'ü' is "v".
50 | */
51 | WITH_V,
52 |
53 | /**
54 | * The option indicates that the output of 'ü' is "ü" in Unicode form.
55 | */
56 | WITH_U_UNICODE;
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/tire/GetWord.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.tire;
2 |
3 | import org.nlpcn.commons.lang.tire.domain.Forest;
4 |
5 | /**
6 | * 基本的string【】 类
7 | *
8 | * @author ansj
9 | *
10 | */
11 | public class GetWord extends SmartGetWord {
12 |
13 | public GetWord(Forest forest, char[] chars) {
14 | super(forest, chars);
15 | }
16 |
17 | public GetWord(Forest forest, String content) {
18 | super(forest, content);
19 | }
20 |
21 | public String getParam(int i) {
22 | final String[] param = this.getParam();
23 | if (param == null || i >= param.length) {
24 | return null;
25 | } else {
26 | return param[i];
27 | }
28 | }
29 |
30 | public String[] getParams() {
31 | return this.getParam();
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/tire/SmartGetWord.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.tire;
2 |
3 | import org.nlpcn.commons.lang.tire.domain.SmartForest;
4 |
5 | public class SmartGetWord {
6 | private static final String EMPTYSTRING = "";
7 | public int offe;
8 | byte status = 0;
9 | int root = 0;
10 | int i = this.root;
11 | boolean isBack = false;
12 | private SmartForest forest;
13 | private char[] chars;
14 | private String str;
15 | private int tempOffe;
16 | private T param;
17 | private SmartForest branch;
18 |
19 | public SmartGetWord(SmartForest forest, String content) {
20 | this.chars = content.toCharArray();
21 | this.forest = forest;
22 | this.branch = forest;
23 | }
24 |
25 | public SmartGetWord(SmartForest forest, char[] chars) {
26 | this.chars = chars;
27 | this.forest = forest;
28 | this.branch = forest;
29 | }
30 |
31 | public String getAllWords() {
32 | String temp = this.allWords();
33 |
34 | temp = checkNumberOrEnglish(temp);
35 |
36 | while (EMPTYSTRING.equals(temp)) {
37 | temp = this.allWords();
38 | temp = checkNumberOrEnglish(temp);
39 | }
40 | return temp;
41 | }
42 |
43 | /**
44 | * 验证一个词语的左右边.不是英文和数字
45 | *
46 | * @param temp
47 | * @return
48 | */
49 | private String checkNumberOrEnglish(String temp) {
50 |
51 | if (temp == null || temp == EMPTYSTRING) {
52 | return temp;
53 | }
54 |
55 | // 先验证最左面
56 |
57 | char l = temp.charAt(0);
58 |
59 | if (l < 127 && offe > 0) {
60 | if (checkSame(l, chars[offe - 1])) {
61 | return EMPTYSTRING;
62 | }
63 | }
64 |
65 | char r = l;
66 |
67 | if (temp.length() > 1) {
68 | r = temp.charAt(temp.length() - 1);
69 | }
70 |
71 | if (r < 127 && (offe + temp.length()) < chars.length) {
72 | if (checkSame(r, chars[offe + temp.length()])) {
73 | return EMPTYSTRING;
74 | }
75 | }
76 |
77 | return temp;
78 | }
79 |
80 | /**
81 | * 验证两个char是否都是数字或者都是英文
82 | *
83 | * @param l
84 | * @param c
85 | * @return
86 | */
87 | private boolean checkSame(char l, char c) {
88 |
89 | if (isE(l) && isE(c)) {
90 | return true;
91 | }
92 |
93 | if (isNum(l) && isNum(c)) {
94 | return true;
95 | }
96 |
97 | return false;
98 | }
99 |
100 | public String getFrontWords() {
101 | String temp = null;
102 | do {
103 | temp = this.frontWords();
104 | temp = checkNumberOrEnglish(temp);
105 | } while (EMPTYSTRING.equals(temp));
106 | return temp;
107 | }
108 |
109 | private Integer tempJLen = null;
110 |
111 | private String allWords() {
112 |
113 | for (; i < chars.length;) {
114 | if (tempJLen == null) {
115 | branch = branch.getBranch(chars[i]);
116 | }
117 | if (branch == null) {
118 | branch = forest;
119 | i++;
120 | continue;
121 | }
122 |
123 | for (int j = i + (tempJLen == null ? 0 : tempJLen); j < chars.length; j++) {
124 | if (j > i) {
125 | branch = branch.getBranch(chars[j]);
126 | }
127 | if (branch == null) {
128 | branch = forest;
129 | i++;
130 | tempJLen = null;
131 | return EMPTYSTRING;
132 | }
133 |
134 | switch (branch.getStatus()) {
135 | case 2:
136 | offe = i;
137 | param = branch.getParam();
138 | tempJLen = j - i + 1;
139 | return new String(chars, i, j - i + 1);
140 | case 3:
141 | offe = i;
142 | param = branch.getParam();
143 | branch = forest;
144 | tempJLen = null;
145 | i++;
146 | return new String(chars, i - 1, j - i + 2);
147 | }
148 |
149 | }
150 |
151 | i++;
152 | branch = forest;
153 | tempJLen = null;
154 | return EMPTYSTRING;
155 |
156 | }
157 |
158 | return null;
159 |
160 | }
161 |
162 | private String frontWords() {
163 | for (; this.i < this.chars.length + 1; this.i++) {
164 | if (i == chars.length) {
165 | this.branch = null;
166 | } else {
167 | this.branch = this.branch.getBranch(this.chars[this.i]);
168 | }
169 | if (this.branch == null) {
170 | this.branch = this.forest;
171 | if (this.isBack) {
172 | this.offe = this.root;
173 | this.str = new String(this.chars, this.root, this.tempOffe);
174 | if (this.str.length() == 0) {
175 | this.root += 1;
176 | this.i = this.root;
177 | } else {
178 | this.i = (this.root + this.tempOffe);
179 | this.root = this.i;
180 | }
181 | this.isBack = false;
182 | return this.str;
183 | }
184 | this.i = this.root;
185 | this.root += 1;
186 | } else {
187 | switch (this.branch.getStatus()) {
188 | case 2:
189 | this.isBack = true;
190 | this.tempOffe = (this.i - this.root + 1);
191 | this.param = this.branch.getParam();
192 | break;
193 | case 3:
194 | this.offe = this.root;
195 | this.str = new String(this.chars, this.root, this.i - this.root + 1);
196 | String temp = this.str;
197 | this.param = this.branch.getParam();
198 | this.branch = this.forest;
199 | this.isBack = false;
200 | if (temp.length() > 0) {
201 | this.i += 1;
202 | this.root = this.i;
203 | } else {
204 | this.i = (this.root + 1);
205 | }
206 | return this.str;
207 | }
208 | }
209 | }
210 | this.tempOffe += this.chars.length;
211 | return null;
212 | }
213 |
214 | public boolean isE(char c) {
215 | if ((c >= 'A') && (c <= 'z')) {
216 | return true;
217 | }
218 | return false;
219 | }
220 |
221 | public boolean isNum(char c) {
222 | if ((c >= '0') && (c <= '9')) {
223 | return true;
224 | }
225 | return false;
226 | }
227 |
228 | public void reset(String content) {
229 | this.offe = 0;
230 | this.status = 0;
231 | this.root = 0;
232 | this.i = this.root;
233 | this.isBack = false;
234 | this.tempOffe = 0;
235 | this.chars = content.toCharArray();
236 | this.branch = this.forest;
237 | }
238 |
239 | /**
240 | * 参数
241 | *
242 | * @return
243 | */
244 | public T getParam() {
245 | return this.param;
246 | }
247 |
248 | }
249 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/tire/domain/Forest.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.tire.domain;
2 |
3 | import org.nlpcn.commons.lang.tire.GetWord;
4 |
5 | public class Forest extends SmartForest {
6 |
7 | private static final long serialVersionUID = -4616310486272978650L;
8 |
9 | public Forest() {
10 | };
11 |
12 | public Forest(char c, int status, String[] param) {
13 | super(c, status, param);
14 | }
15 |
16 | public SmartForest get(char c) {
17 | return this.getBranch(c);
18 | }
19 |
20 | public SmartForest getBranch(char c) {
21 | return super.getBranch(c);
22 | }
23 |
24 | public GetWord getWord(String str) {
25 | return getWord(str.toCharArray());
26 | }
27 |
28 | public GetWord getWord(char[] chars) {
29 | return new GetWord(this, chars);
30 | }
31 |
32 | public String[] getParams() {
33 | return this.getParam();
34 | }
35 |
36 | }
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/tire/domain/Value.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.tire.domain;
2 |
3 | import java.util.Arrays;
4 |
5 | public class Value {
6 | private static final String TAB = "\t";
7 | private String keyword;
8 | private String[] paramers = new String[0];
9 |
10 | public Value(String keyword, String... paramers) {
11 | this.keyword = keyword;
12 | if (paramers != null) {
13 | this.paramers = paramers;
14 | }
15 | }
16 |
17 | public Value(String temp) {
18 | String[] strs = temp.split(TAB);
19 | this.keyword = strs[0];
20 | if (strs.length > 1) {
21 | this.paramers = Arrays.copyOfRange(strs, 1, strs.length);
22 | }
23 | }
24 |
25 | public String getKeyword() {
26 | return keyword;
27 | }
28 |
29 | public void setKeyword(String keyword) {
30 | this.keyword = keyword;
31 | }
32 |
33 | public String[] getParamers() {
34 | return paramers;
35 | }
36 |
37 | public void setParamers(String[] paramers) {
38 | this.paramers = paramers;
39 | }
40 |
41 | @Override
42 | public String toString() {
43 | StringBuilder sb = new StringBuilder();
44 | sb.append(keyword);
45 | for (int i = 0; i < paramers.length; i++) {
46 | sb.append(TAB);
47 | sb.append(paramers[i]);
48 | }
49 | return sb.toString();
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/tire/library/Library.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.tire.library;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.FileInputStream;
5 | import java.io.InputStream;
6 | import java.util.List;
7 |
8 | import org.nlpcn.commons.lang.tire.domain.Forest;
9 | import org.nlpcn.commons.lang.tire.domain.SmartForest;
10 | import org.nlpcn.commons.lang.tire.domain.Value;
11 | import org.nlpcn.commons.lang.util.IOUtil;
12 |
13 | public class Library {
14 |
15 | public static Forest makeForest(String path) throws Exception {
16 | return makeForest(new FileInputStream(path));
17 | }
18 |
19 | public static Forest makeForest(String path, String encoding) throws Exception {
20 | return makeForest(new FileInputStream(path), encoding);
21 | }
22 |
23 | public static Forest makeForest(InputStream inputStream) throws Exception {
24 | return makeForest(IOUtil.getReader(inputStream, "UTF-8"));
25 | }
26 |
27 | public static Forest makeForest(InputStream inputStream, String encoding) throws Exception {
28 | return makeForest(IOUtil.getReader(inputStream, encoding));
29 | }
30 |
31 | public static Forest makeForest(BufferedReader br) throws Exception {
32 | return makeLibrary(br, new Forest());
33 | }
34 |
35 | /**
36 | * 传入value数组.构造树
37 | *
38 | * @param values
39 | * @return
40 | */
41 | public static Forest makeForest(List values) {
42 | Forest forest = new Forest();
43 | for (Value value : values) {
44 | insertWord(forest, value.toString());
45 | }
46 | return forest;
47 | }
48 |
49 | /**
50 | * 词典树的构造方法
51 | *
52 | * @param br
53 | * @param forest
54 | * @return
55 | * @throws Exception
56 | */
57 | private static Forest makeLibrary(BufferedReader br, Forest forest) throws Exception {
58 | if (br == null)
59 | return forest;
60 | try {
61 | String temp = null;
62 | while ((temp = br.readLine()) != null) {
63 | insertWord(forest, temp);
64 | }
65 | } catch (Exception e) {
66 | e.printStackTrace();
67 | } finally {
68 | br.close();
69 | }
70 | return forest;
71 | }
72 |
73 | public static void insertWord(Forest forest, Value value) {
74 | insertWord(forest, value.getKeyword(), value.getParamers());
75 | }
76 |
77 | /**
78 | * 插入一个词
79 | *
80 | * @param forest
81 | * @param temp
82 | */
83 | public static void insertWord(Forest forest, String temp) {
84 | String[] param = temp.split("\t");
85 |
86 | temp = param[0];
87 |
88 | String[] resultParams = new String[param.length - 1];
89 | for (int j = 1; j < param.length; j++) {
90 | resultParams[j - 1] = param[j];
91 | }
92 |
93 | insertWord(forest, temp, resultParams);
94 | }
95 |
96 | private static void insertWord(Forest forest, String temp, String... param) {
97 | SmartForest branch = forest;
98 | char[] chars = temp.toCharArray();
99 | for (int i = 0; i < chars.length; i++) {
100 | if (chars.length == i + 1) {
101 | branch.add(new Forest(chars[i], 3, param));
102 | } else {
103 | branch.add(new Forest(chars[i], 1, null));
104 | }
105 | branch = branch.getBranch(chars[i]);
106 | }
107 | }
108 |
109 | /**
110 | * 删除一个词
111 | *
112 | * @param forest
113 | * @param word
114 | */
115 | public static void removeWord(Forest forest, String word) {
116 | SmartForest branch = forest;
117 | char[] chars = word.toCharArray();
118 |
119 | for (int i = 0; i < chars.length; i++) {
120 | if (branch == null)
121 | return;
122 | if (chars.length == i + 1) {
123 | branch.add(new Forest(chars[i], -1, null));
124 | }
125 | branch = branch.getBranch(chars[i]);
126 | }
127 | }
128 | }
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/AnsjArrays.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.lang.reflect.Array;
4 | import java.util.Arrays;
5 |
6 | import org.nlpcn.commons.lang.tire.domain.SmartForest;
7 |
8 | @SuppressWarnings("all")
9 | public class AnsjArrays {
10 | private static final int INSERTIONSORT_THRESHOLD = 7;
11 |
12 | /**
13 | * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了
14 | *
15 | * @param branches
16 | * branches
17 | * @param c
18 | * char
19 | * @return idx
20 | */
21 |
22 | public static > int binarySearch(T[] branches, char c) {
23 | int high = branches.length - 1;
24 | if (branches.length < 1) {
25 | return high;
26 | }
27 | int low = 0;
28 | while (low <= high) {
29 | int mid = (low + high) >>> 1;
30 | int cmp = branches[mid].compareTo(c);
31 |
32 | if (cmp < 0)
33 | low = mid + 1;
34 | else if (cmp > 0)
35 | high = mid - 1;
36 | else
37 | return mid; // key found
38 | }
39 | return -(low + 1); // key not found.
40 | }
41 |
42 | public static void main(String[] args) {
43 |
44 | int[] chars = { 1, 2, 3, 4, 5, 6, 8, 7 };
45 | chars = Arrays.copyOf(chars, 100);
46 | System.out.println(chars.length);
47 | for (int i = 0; i < chars.length; i++) {
48 | System.out.println(chars[i]);
49 | }
50 | }
51 |
52 | public static void sort(SmartForest[] a) {
53 | SmartForest[] aux = a.clone();
54 | mergeSort(aux, a, 0, a.length, 0);
55 | }
56 |
57 | public static void sort(SmartForest[] a, int fromIndex, int toIndex) {
58 | rangeCheck(a.length, fromIndex, toIndex);
59 | SmartForest[] aux = copyOfRange(a, fromIndex, toIndex);
60 | mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
61 | }
62 |
63 | private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
64 | if (fromIndex > toIndex)
65 | throw new IllegalArgumentException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
66 | if (fromIndex < 0)
67 | throw new ArrayIndexOutOfBoundsException(fromIndex);
68 | if (toIndex > arrayLen)
69 | throw new ArrayIndexOutOfBoundsException(toIndex);
70 | }
71 |
72 | private static void mergeSort(SmartForest[] src, SmartForest[] dest, int low, int high, int off) {
73 | int length = high - low;
74 |
75 | // Insertion sort on smallest arrays
76 | if (length < INSERTIONSORT_THRESHOLD) {
77 | for (int i = low; i < high; i++)
78 | for (int j = i; j > low && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--)
79 | swap(dest, j, j - 1);
80 | return;
81 | }
82 |
83 | // Recursively sort halves of dest into src
84 | int destLow = low;
85 | int destHigh = high;
86 | low += off;
87 | high += off;
88 | int mid = (low + high) >>> 1;
89 | mergeSort(dest, src, low, mid, -off);
90 | mergeSort(dest, src, mid, high, -off);
91 |
92 | // If list is already sorted, just copy from src to dest. This is an
93 | // optimization that results in faster sorts for nearly ordered lists.
94 | if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
95 | System.arraycopy(src, low, dest, destLow, length);
96 | return;
97 | }
98 |
99 | // Merge sorted halves (now in src) into dest
100 | for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
101 | if (q >= high || p < mid && src[p].compareTo(src[q].getC()) <= 0)
102 | dest[i] = src[p++];
103 | else
104 | dest[i] = src[q++];
105 | }
106 | }
107 |
108 | /**
109 | * Swaps x[a] with x[b].
110 | */
111 | private static void swap(SmartForest[] x, int a, int b) {
112 | SmartForest t = x[a];
113 | x[a] = x[b];
114 | x[b] = t;
115 | }
116 |
117 | @SuppressWarnings("unchecked")
118 | public static T[] copyOfRange(T[] original, int from, int to) {
119 | return copyOfRange(original, from, to, (Class) original.getClass());
120 | }
121 |
122 | public static T[] copyOfRange(U[] original, int from, int to, Class extends T[]> newType) {
123 | int newLength = to - from;
124 | if (newLength < 0)
125 | throw new IllegalArgumentException(from + " > " + to);
126 | @SuppressWarnings("unchecked")
127 | T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength] : (T[]) Array.newInstance(newType.getComponentType(), newLength);
128 | System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength));
129 | return copy;
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/CollectionUtil.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.util.*;
4 |
5 | public class CollectionUtil {
6 | /**
7 | * map 按照value排序
8 | *
9 | * @return
10 | */
11 | public static List> sortMapByValue(Map map, final int sort) {
12 | List> orderList = new ArrayList>(map.entrySet());
13 | Collections.sort(orderList, new Comparator>() {
14 | @Override
15 | @SuppressWarnings("unchecked")
16 | public int compare(Map.Entry o1, Map.Entry o2) {
17 | return (((Comparable) o2.getValue()).compareTo(o1.getValue())) * sort;
18 | }
19 | });
20 | return orderList;
21 | }
22 |
23 | public static Map as(K k1, V v1) {
24 | Map result = new HashMap() ;
25 | result.put(k1, v1) ;
26 | return result ;
27 | }
28 |
29 |
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/FileFinder.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.io.File;
4 | import java.security.AccessControlException;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.nlpcn.commons.lang.util.logging.Log;
9 | import org.nlpcn.commons.lang.util.logging.LogFactory;
10 |
11 | /**
12 | * 从系统各个环境中找文件.或者文件夹
13 | *
14 | * @author ansj
15 | */
16 | public class FileFinder {
17 |
18 | private static final Log LOG = LogFactory.getLog();
19 |
20 | /**
21 | * 系统路径分隔符
22 | */
23 | private static final String SEPARATOR = System.getProperty("path.separator");
24 | private static final String[] PATHS_PROPERTIES = new String[] { "java.class.path", "java.library.path" };
25 |
26 | public static List fileDir = new ArrayList();
27 |
28 | static {
29 | fileDir.add(new File("").getAbsoluteFile());
30 | }
31 |
32 | /**
33 | * 输入一个文件名或者文件的最后路径寻找文件 default deep Integer.max
34 | *
35 | * @param
36 | * @return
37 | */
38 | public static File find(String lastPath) {
39 | return find(lastPath, Integer.MAX_VALUE);
40 | }
41 |
42 | /**
43 | * 输入一个文件名或者文件的最后路径寻找文件
44 | *
45 | * @param
46 | * @return
47 | */
48 | public static File find(String lastPath, int deep) {
49 |
50 | // 先深度查找
51 | for (File file : fileDir) {
52 | if (file.exists() && file.canRead()) {
53 | file = findByFile(file, lastPath, deep);
54 | if (file != null) {
55 | return file;
56 | }
57 | }
58 | }
59 | // 再从基本几个目录中查找
60 | for (String pathProperties : PATHS_PROPERTIES) {
61 | String[] propertyPath = System.getProperty(pathProperties).split(SEPARATOR);
62 | for (String path : propertyPath) {
63 | File file = new File(path);
64 | try {
65 | if (file.canRead() && file.exists()) {
66 | file = findByFile(file, lastPath, deep);
67 | if (file != null) {
68 | return file;
69 | }
70 | }
71 | } catch (AccessControlException e) {
72 | LOG.info(path + " not access to visit");
73 | }
74 | }
75 | }
76 | return null;
77 | }
78 |
79 | /**
80 | * 根据一个文件深度查找
81 | *
82 | * @param file
83 | * @param lastPath
84 | * @param deep integer.max
85 | * @return
86 | */
87 | public static File findByFile(File file, String lastPath) {
88 | return findByFile(file, lastPath, Integer.MAX_VALUE);
89 | }
90 |
91 | /**
92 | * 根据一个文件深度查找
93 | *
94 | * @param file
95 | * @param lastPath
96 | * @param deep
97 | * @return
98 | */
99 | public static File findByFile(File file, String lastPath, int deep) {
100 | if (deep == 0 || !file.exists() || !file.canRead()) {
101 | return null;
102 | }
103 | if (file.getAbsolutePath().endsWith(lastPath)) {
104 | return file;
105 | }
106 | if (file.isDirectory()) {
107 |
108 | File[] listFiles = file.listFiles();
109 | if (listFiles != null && listFiles.length > 0) {
110 | for (File file2 : listFiles) {
111 | File temp = findByFile(file2, lastPath, deep - 1);
112 | if (temp != null) {
113 | return temp;
114 | }
115 | }
116 | }
117 | }
118 | return null;
119 | }
120 |
121 | }
122 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/FileIterator.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.Closeable;
5 | import java.io.FileNotFoundException;
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.UnsupportedEncodingException;
9 | import java.util.Iterator;
10 |
11 | /**
12 | * 文件迭代器
13 | *
14 | * @author ansj
15 | */
16 | public class FileIterator implements Iterator, Closeable {
17 | String temp = null;
18 | private BufferedReader br = null;
19 |
20 | protected FileIterator(String path, String charEncoding) throws UnsupportedEncodingException, FileNotFoundException {
21 | br = IOUtil.getReader(path, charEncoding);
22 | }
23 |
24 | protected FileIterator(InputStream is, String charEncoding) throws UnsupportedEncodingException, FileNotFoundException {
25 | br = IOUtil.getReader(is, charEncoding);
26 | }
27 |
28 | @Override
29 | public boolean hasNext() {
30 | if (temp == null) {
31 | try {
32 | temp = br.readLine();
33 | } catch (IOException e) {
34 | // TODO Auto-generated catch block
35 | e.printStackTrace();
36 | }
37 | if (temp == null) {
38 | return false;
39 | } else {
40 | return true;
41 | }
42 | } else {
43 | return true;
44 | }
45 | }
46 |
47 | public String readLine() {
48 | try {
49 | if (temp == null) {
50 | temp = br.readLine();
51 | }
52 | return temp;
53 | } catch (IOException e) {
54 | // TODO Auto-generated catch block
55 | e.printStackTrace();
56 | return null;
57 | } finally {
58 | temp = null;
59 | }
60 | }
61 |
62 | @Override
63 | public void close() {
64 | if (br != null)
65 | try {
66 | br.close();
67 | } catch (IOException e) {
68 | e.printStackTrace();
69 | }
70 | }
71 |
72 | @Override
73 | public String next() {
74 | return readLine();
75 | }
76 |
77 | @Override
78 | public void remove() {
79 | throw new RuntimeException("file iteartor can not remove ");
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/MD5.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.security.MessageDigest;
4 | import java.security.NoSuchAlgorithmException;
5 |
6 | public class MD5 {
7 | /**
8 | * MD5加密类
9 | * @param str 要加密的字符串
10 | * @return 加密后的字符串
11 | */
12 | public static String code(String str){
13 | try {
14 | MessageDigest md = MessageDigest.getInstance("MD5");
15 | md.update(str.getBytes());
16 | byte[]byteDigest = md.digest();
17 | int i;
18 | StringBuffer buf = new StringBuffer("");
19 | for (int offset = 0; offset < byteDigest.length; offset++) {
20 | i = byteDigest[offset];
21 | if (i < 0)
22 | i += 256;
23 | if (i < 16)
24 | buf.append("0");
25 | buf.append(Integer.toHexString(i));
26 | }
27 | //32位加密
28 | return buf.toString();
29 | // 16位的加密
30 | //return buf.toString().substring(8, 24);
31 | } catch (NoSuchAlgorithmException e) {
32 | e.printStackTrace();
33 | return null;
34 | }
35 |
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/MapCount.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.io.Serializable;
4 | import java.util.Collection;
5 | import java.util.HashMap;
6 | import java.util.Iterator;
7 | import java.util.Map;
8 | import java.util.Map.Entry;
9 |
10 | /**
11 | * 用map做的计数器.
12 | *
13 | * @param
14 | * @author ansj
15 | */
16 | public class MapCount implements Serializable {
17 | private static final long serialVersionUID = 1L;
18 | private HashMap hm = null;
19 |
20 | public MapCount() {
21 | hm = new HashMap();
22 | }
23 |
24 | public MapCount(HashMap hm) {
25 | this.hm = hm;
26 | }
27 |
28 | public MapCount(int initialCapacity) {
29 | hm = new HashMap(initialCapacity);
30 | }
31 |
32 | /**
33 | * 增加一个元素
34 | *
35 | * @param t
36 | * @param n
37 | */
38 | public void add(T t, double n) {
39 | Double value = null;
40 | if ((value = hm.get(t)) != null) {
41 | hm.put(t, value + n);
42 | } else {
43 | hm.put(t, Double.valueOf(n));
44 | }
45 | }
46 |
47 | /**
48 | * 兼容旧的api
49 | *
50 | * @param t
51 | * @param n
52 | */
53 | public void add(T t, int n) {
54 | add(t, (double) n);
55 | }
56 |
57 | /**
58 | * 计数增加.默认为1
59 | *
60 | * @param t
61 | */
62 | public void add(T t) {
63 | this.add(t, 1);
64 | }
65 |
66 | /**
67 | * map的大小
68 | *
69 | * @return
70 | */
71 | public int size() {
72 | return hm.size();
73 | }
74 |
75 | /**
76 | * 删除一个元素
77 | *
78 | * @param t
79 | */
80 | public void remove(T t) {
81 | hm.remove(t);
82 | }
83 |
84 | /**
85 | * 得道内部的map
86 | *
87 | * @return
88 | */
89 | public HashMap get() {
90 | return this.hm;
91 | }
92 |
93 | /**
94 | * 将map序列化为词典格式
95 | *
96 | * @return
97 | */
98 | public String getDic() {
99 | Iterator> iterator = this.hm.entrySet().iterator();
100 | StringBuilder sb = new StringBuilder();
101 | Entry next = null;
102 | while (iterator.hasNext()) {
103 | next = iterator.next();
104 | sb.append(next.getKey());
105 | sb.append("\t");
106 | sb.append(next.getValue());
107 | sb.append("\n");
108 | }
109 | return sb.toString();
110 | }
111 |
112 | /**
113 | * 批量增加
114 | *
115 | * @param hs
116 | */
117 | public void addAll(Collection collection) {
118 | for (T t : collection) {
119 | this.add(t);
120 | }
121 | }
122 |
123 | /**
124 | * 批量增加
125 | *
126 | * @param hs
127 | */
128 | public void addAll(Collection collection, double weight) {
129 | for (T t : collection) {
130 | this.add(t, weight);
131 | }
132 | }
133 |
134 | /**
135 | * 批量增加
136 | *
137 | * @param hs
138 | */
139 | public void addAll(Map map) {
140 | for (Entry e : map.entrySet()) {
141 | this.add(e.getKey(), e.getValue());
142 | }
143 | }
144 | }
145 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/MapFactory.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import java.util.TreeMap;
6 |
7 | /**
8 | * map 工具类
9 | *
10 | * @author ansj
11 | *
12 | * @param
13 | * @param
14 | */
15 | public class MapFactory {
16 |
17 | private Map map = null;
18 |
19 | private MapFactory() {
20 | }
21 |
22 | public static MapFactory hashMap() {
23 | MapFactory mf = new MapFactory();
24 | mf.map = new HashMap();
25 | return mf;
26 | }
27 |
28 | public static MapFactory treeMap() {
29 | MapFactory mf = new MapFactory();
30 | mf.map = new TreeMap();
31 | return mf;
32 | }
33 |
34 | public MapFactory a(K k, V v) {
35 | map.put(k, v);
36 | return this;
37 | }
38 |
39 | public Map toMap() {
40 | return map;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/MurmurHash.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | /**
4 | * murmur hash 2.0.
5 | *
6 | * The murmur hash is a relatively fast hash function from
7 | * http://murmurhash.googlepages.com/ for platforms with efficient
8 | * multiplication.
9 | *
10 | * This is a re-implementation of the original C code plus some
11 | * additional features.
12 | *
13 | * Public domain.
14 | *
15 | * @author Viliam Holub
16 | * @version 1.0.2
17 | *
18 | */
19 | public final class MurmurHash {
20 |
21 | // all methods static; private constructor.
22 | private MurmurHash() {}
23 |
24 | /**
25 | * Generates 32 bit hash from byte array of the given length and
26 | * seed.
27 | *
28 | * @param data byte array to hash
29 | * @param length length of the array to hash
30 | * @param seed initial seed value
31 | * @return 32 bit hash of the given array
32 | */
33 | public static int hash32(final byte[] data, int length, int seed) {
34 | // 'm' and 'r' are mixing constants generated offline.
35 | // They're not really 'magic', they just happen to work well.
36 | final int m = 0x5bd1e995;
37 | final int r = 24;
38 |
39 | // Initialize the hash to a random value
40 | int h = seed^length;
41 | int length4 = length/4;
42 |
43 | for (int i=0; i>> r;
49 | k *= m;
50 | h *= m;
51 | h ^= k;
52 | }
53 |
54 | // Handle the last few bytes of the input array
55 | switch (length%4) {
56 | case 3: h ^= (data[(length&~3) +2]&0xff) << 16;
57 | case 2: h ^= (data[(length&~3) +1]&0xff) << 8;
58 | case 1: h ^= (data[length&~3]&0xff);
59 | h *= m;
60 | }
61 |
62 | h ^= h >>> 13;
63 | h *= m;
64 | h ^= h >>> 15;
65 |
66 | return h;
67 | }
68 |
69 | /**
70 | * Generates 32 bit hash from byte array with default seed value.
71 | *
72 | * @param data byte array to hash
73 | * @param length length of the array to hash
74 | * @return 32 bit hash of the given array
75 | */
76 | public static int hash32(final byte[] data, int length) {
77 | return hash32(data, length, 0x9747b28c);
78 | }
79 |
80 | /**
81 | * Generates 32 bit hash from a string.
82 | *
83 | * @param text string to hash
84 | * @return 32 bit hash of the given string
85 | */
86 | public static int hash32(final String text) {
87 | final byte[] bytes = text.getBytes();
88 | return hash32(bytes, bytes.length);
89 | }
90 |
91 | /**
92 | * Generates 32 bit hash from a substring.
93 | *
94 | * @param text string to hash
95 | * @param from starting index
96 | * @param length length of the substring to hash
97 | * @return 32 bit hash of the given string
98 | */
99 | public static int hash32(final String text, int from, int length) {
100 | return hash32(text.substring( from, from+length));
101 | }
102 |
103 | /**
104 | * Generates 64 bit hash from byte array of the given length and seed.
105 | *
106 | * @param data byte array to hash
107 | * @param length length of the array to hash
108 | * @param seed initial seed value
109 | * @return 64 bit hash of the given array
110 | */
111 | public static long hash64(final byte[] data, int length, int seed) {
112 | final long m = 0xc6a4a7935bd1e995L;
113 | final int r = 47;
114 |
115 | long h = (seed&0xffffffffl)^(length*m);
116 |
117 | int length8 = length/8;
118 |
119 | for (int i=0; i>> r;
128 | k *= m;
129 |
130 | h ^= k;
131 | h *= m;
132 | }
133 |
134 | switch (length%8) {
135 | case 7: h ^= (long)(data[(length&~7)+6]&0xff) << 48;
136 | case 6: h ^= (long)(data[(length&~7)+5]&0xff) << 40;
137 | case 5: h ^= (long)(data[(length&~7)+4]&0xff) << 32;
138 | case 4: h ^= (long)(data[(length&~7)+3]&0xff) << 24;
139 | case 3: h ^= (long)(data[(length&~7)+2]&0xff) << 16;
140 | case 2: h ^= (long)(data[(length&~7)+1]&0xff) << 8;
141 | case 1: h ^= (long)(data[length&~7]&0xff);
142 | h *= m;
143 | };
144 |
145 | h ^= h >>> r;
146 | h *= m;
147 | h ^= h >>> r;
148 |
149 | return h;
150 | }
151 |
152 | /**
153 | * Generates 64 bit hash from byte array with default seed value.
154 | *
155 | * @param data byte array to hash
156 | * @param length length of the array to hash
157 | * @return 64 bit hash of the given string
158 | */
159 | public static long hash64(final byte[] data, int length) {
160 | return hash64(data, length, 0xe17a1465);
161 | }
162 |
163 | /**
164 | * Generates 64 bit hash from a string.
165 | *
166 | * @param text string to hash
167 | * @return 64 bit hash of the given string
168 | */
169 | public static long hash64(final String text) {
170 | final byte[] bytes = text.getBytes();
171 | return hash64(bytes, bytes.length);
172 | }
173 |
174 | /**
175 | * Generates 64 bit hash from a substring.
176 | *
177 | * @param text string to hash
178 | * @param from starting index
179 | * @param length length of the substring to hash
180 | * @return 64 bit hash of the given array
181 | */
182 | public static long hash64(final String text, int from, int length) {
183 | return hash64(text.substring( from, from+length));
184 | }
185 | }
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/ObjConver.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.text.ParseException;
4 | import java.text.SimpleDateFormat;
5 | import java.util.Date;
6 |
7 | public class ObjConver {
8 |
9 | public static final String DEFFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
10 |
11 | public static Double getDouble(String value) {
12 | return castToDouble(value);
13 | }
14 |
15 | public static Double getDoubleValue(String value) {
16 | if (StringUtil.isBlank(value)) {
17 | return 0D;
18 | }
19 | return castToDouble(value);
20 | }
21 |
22 | public static Float getFloat(String value) {
23 | if (StringUtil.isBlank(value)) {
24 | return null;
25 | }
26 | return castToFloat(value);
27 | }
28 |
29 | public static Float getFloatValue(String value) {
30 | return castToFloat(value).floatValue();
31 | }
32 |
33 | public static Integer getInteger(String value) {
34 | return castToInteger(value);
35 | }
36 |
37 | public static int getIntValue(String value) {
38 | if (StringUtil.isBlank(value)) {
39 | return 0;
40 | }
41 | return castToInteger(value);
42 | }
43 |
44 | public static Date getDate(String value) {
45 | if (StringUtil.isBlank(value)) {
46 | return null;
47 | }
48 | return castToDate(value);
49 | }
50 |
51 | public static Long getLong(String value) {
52 | return castToLong(value);
53 | }
54 |
55 | public static long getLongValue(String value) {
56 | if (StringUtil.isBlank(value)) {
57 | return 0L;
58 | }
59 | return castToLong(value);
60 | }
61 |
62 | public static Boolean getBoolean(String value) {
63 | return castToBoolean(value);
64 | }
65 |
66 | public static boolean getBooleanValue(String value) {
67 | if (StringUtil.isBlank(value)) {
68 | return false;
69 | }
70 | return castToBoolean(value);
71 |
72 | }
73 |
74 | public static final Float castToFloat(Object value) {
75 | if (value == null) {
76 | return null;
77 | }
78 |
79 | if (value instanceof Number) {
80 | return ((Number) value).floatValue();
81 | }
82 |
83 | if (value instanceof String) {
84 | String strVal = value.toString();
85 | if (strVal.length() == 0) {
86 | return null;
87 | }
88 |
89 | return Float.parseFloat(strVal);
90 | }
91 |
92 | throw new ClassCastException("can not cast to float, value : " + value);
93 | }
94 |
95 | public static final Double castToDouble(Object value) {
96 | if (value == null) {
97 | return null;
98 | }
99 | if (value instanceof Number) {
100 | return ((Number) value).doubleValue();
101 | } else if (value instanceof String) {
102 | String strVal = value.toString();
103 | if (strVal.length() == 0) {
104 | return null;
105 | }
106 | return Double.parseDouble(strVal);
107 | }
108 | throw new RuntimeException("can not cast to double, value : " + value);
109 | }
110 |
111 | public static final Date castToDate(Object value) {
112 | if (value == null) {
113 | return null;
114 | }
115 | long longValue = -1;
116 |
117 | if(value instanceof Date){
118 | return (Date) value ;
119 | }else if (value instanceof Number) {
120 | longValue = ((Number) value).longValue();
121 | } else if (value instanceof String) {
122 | String strVal = (String) value;
123 |
124 | if (strVal.indexOf('-') != -1) {
125 | String format = null;
126 | if (strVal.length() == DEFFAULT_DATE_FORMAT.length()) {
127 | format = DEFFAULT_DATE_FORMAT;
128 | } else if (strVal.length() == 10) {
129 | format = "yyyy-MM-dd";
130 | } else if (strVal.length() == "yyyy-MM-dd HH".length()) {
131 | format = "yyyy-MM-dd HH";
132 | } else if (strVal.length() == "yyyy-MM-dd HH:mm".length()) {
133 | format = "yyyy-MM-dd HH:mm";
134 | } else if (strVal.length() == "yyyy-MM-dd HH:mm:ss".length()) {
135 | format = "yyyy-MM-dd HH:mm:ss";
136 | } else if (strVal.length() == "yyyy-MM-dd HH:mm:SSS".length()) {
137 | format = "yyyy-MM-dd HH:mm:ss.SSS";
138 | } else {
139 | return null;
140 | }
141 |
142 | SimpleDateFormat dateFormat = new SimpleDateFormat(format);
143 | try {
144 | return dateFormat.parse(strVal);
145 | } catch (ParseException e) {
146 | throw new RuntimeException("can not cast to Date, value : " + strVal);
147 | }
148 | }
149 |
150 | if (strVal.length() == 0) {
151 | return null;
152 | }
153 |
154 | longValue = Long.parseLong(strVal);
155 | }
156 |
157 | if (longValue < 0) {
158 | throw new ClassCastException("can not cast to Date, value : " + value);
159 | }
160 |
161 | return new Date(longValue);
162 | }
163 |
164 | public static final Long castToLong(Object value) {
165 | if (value == null) {
166 | return null;
167 | }
168 |
169 | if (value instanceof Number) {
170 | return ((Number) value).longValue();
171 | }
172 |
173 | if (value instanceof String) {
174 | String strVal = (String) value;
175 | if (strVal.length() == 0) {
176 | return null;
177 | }
178 |
179 | try {
180 | return Long.parseLong(strVal);
181 | } catch (NumberFormatException ex) {
182 | }
183 |
184 | Date date = castToDate(strVal);
185 |
186 | if (date != null) {
187 | return date.getTime();
188 | }
189 | }
190 |
191 | throw new ClassCastException("can not cast to long, value : " + value);
192 | }
193 |
194 | public static final Integer castToInteger(Object value) {
195 | if (value == null) {
196 | return null;
197 | }
198 |
199 | if (value instanceof Integer) {
200 | return (Integer) value;
201 | }
202 |
203 | if (value instanceof Number) {
204 | return ((Number) value).intValue();
205 | }
206 |
207 | if (value instanceof String) {
208 | String strVal = (String) value;
209 | if (strVal.length() == 0) {
210 | return null;
211 | }
212 |
213 | return Integer.parseInt(strVal);
214 | }
215 |
216 | throw new ClassCastException("can not cast to int, value : " + value);
217 | }
218 |
219 | public static final Boolean castToBoolean(Object value) {
220 | if (value == null) {
221 | return null;
222 | }
223 |
224 | if (value instanceof Boolean) {
225 | return (Boolean) value;
226 | }
227 |
228 | if (value instanceof Number) {
229 | return ((Number) value).intValue() == 1;
230 | }
231 |
232 | if (value instanceof String) {
233 | String str = (String) value;
234 | if (str.length() == 0) {
235 | return null;
236 | }
237 |
238 | if ("true".equalsIgnoreCase(str)) {
239 | return Boolean.TRUE;
240 | }
241 | if ("false".equalsIgnoreCase(str)) {
242 | return Boolean.FALSE;
243 | }
244 |
245 | if ("1".equalsIgnoreCase(str)) {
246 | return Boolean.TRUE;
247 | }
248 | }
249 |
250 | throw new ClassCastException("can not cast to int, value : " + value);
251 | }
252 |
253 | private static Character castToCharacter(Object value) {
254 |
255 | if (value instanceof Character) {
256 | return (Character) value;
257 | }
258 |
259 | if (value instanceof Number) {
260 | return (char) ((Number) value).intValue();
261 | }
262 |
263 | if (value != null) {
264 | return value.toString().trim().charAt(0);
265 | }
266 |
267 | return null;
268 | }
269 |
270 | /**
271 | * 将一个对象转换为对应的类
272 | *
273 | * @param
274 | *
275 | * @param
276 | *
277 | * @param
278 | * @param value
279 | * @param c
280 | * @return
281 | */
282 | @SuppressWarnings("unchecked")
283 | public static T conversion(Object value, Class c) {
284 | if (String.class.equals(c)) {
285 | return (T) value;
286 | } else if (Character.class.equals(c)) {
287 | return (T) ObjConver.castToCharacter(value);
288 | } else if (Integer.class.equals(c)) {
289 | return (T) ObjConver.castToInteger(value);
290 | } else if (Double.class.equals(c)) {
291 | return (T) ObjConver.castToDouble(value);
292 | } else if (Float.class.equals(c)) {
293 | return (T) ObjConver.castToFloat(value);
294 | } else if (Long.class.equals(c)) {
295 | return (T) ObjConver.castToLong(value);
296 | } else if (Boolean.class.equals(c)) {
297 | return (T) ObjConver.castToBoolean(value);
298 | } else {
299 | throw new RuntimeException("not define this class by " + c);
300 | }
301 | }
302 |
303 | }
304 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/StringUtil.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.Collection;
6 | import java.util.Iterator;
7 | import java.util.List;
8 | import java.util.regex.Matcher;
9 | import java.util.regex.Pattern;
10 |
11 | public class StringUtil {
12 |
13 | private static final char DY = '\'';
14 | private static final char DH = ',';
15 | private static int[] filter = new int[128];
16 | private static int[] filterEnd = new int[128];
17 | private static final String EMPTY = "";
18 | private static final String NULL = "null";
19 |
20 | static {
21 | filter['<'] = Integer.MAX_VALUE / 2;
22 | filterEnd['<'] = '>';
23 |
24 | filter['&'] = 10;
25 | filterEnd['&'] = ';';
26 |
27 | filter[';'] = -1;
28 | filter['\n'] = -1;
29 |
30 | filter['\r'] = -1;
31 | filter['\t'] = -1;
32 | filter[' '] = 1;
33 | filter['*'] = 1;
34 | filter['-'] = 1;
35 | filter['.'] = 1;
36 | filter['#'] = 1;
37 |
38 | }
39 |
40 | /**
41 | * 去除html标签
42 | *
43 | * @param input
44 | * @return
45 | */
46 | public static String rmHtmlTag(String input) {
47 | if (isBlank(input)) {
48 | return "";
49 | }
50 | int length = input.length();
51 | int tl = 0;
52 | StringBuilder sb = new StringBuilder();
53 | char c = 0;
54 | for (int i = 0; i < length; i++) {
55 | c = input.charAt(i);
56 |
57 | if (c > 127) {
58 | sb.append(c);
59 | continue;
60 | }
61 |
62 | switch (filter[c]) {
63 | case -1:
64 | break;
65 | case 0:
66 | sb.append(c);
67 | break;
68 | case 1:
69 | if (sb.length() > 0 && sb.charAt(sb.length() - 1) != c)
70 | sb.append(c);
71 | do {
72 | i++;
73 | } while (i < length && input.charAt(i) == c);
74 |
75 | if (i < length || input.charAt(length - 1) != c)
76 | i--;
77 | break;
78 | default:
79 | tl = filter[c] + i;
80 | int tempOff = i;
81 | boolean flag = false;
82 | char end = (char) filterEnd[c];
83 | for (i++; i < length && i < tl; i++) {
84 | c = input.charAt(i);
85 | if (c > 127)
86 | continue;
87 | if (c == end) {
88 | flag = true;
89 | break;
90 | }
91 | }
92 | if (!flag) {
93 | i = tempOff;
94 | sb.append(input.charAt(i));
95 | }
96 | break;
97 | }
98 | }
99 | return sb.toString();
100 | }
101 |
102 | /**
103 | * 判断字符串是否为空
104 | *
105 | * @param cs
106 | * @return
107 | */
108 | public static boolean isBlank(CharSequence cs) {
109 | int strLen;
110 | if (cs == null || (strLen = cs.length()) == 0) {
111 | return true;
112 | }
113 | for (int i = 0; i < strLen; i++) {
114 | if (Character.isWhitespace(cs.charAt(i)) == false) {
115 | return false;
116 | }
117 | }
118 | return true;
119 | }
120 |
121 | /**
122 | * 判断字符串是否不为空
123 | *
124 | * @param cs
125 | * @return
126 | */
127 | public static boolean isNotBlank(CharSequence cs) {
128 | return !isBlank(cs);
129 |
130 | }
131 |
132 | public static String makeSqlInString(String str) {
133 | String[] strs = str.split(",");
134 | StringBuilder sb = new StringBuilder();
135 | String field = null;
136 | for (int i = 0; i < strs.length; i++) {
137 | field = strs[i].trim();
138 | if (isNotBlank(field)) {
139 | sb.append(DY);
140 | sb.append(field);
141 | sb.append(DY);
142 | if (i < strs.length - 1) {
143 | sb.append(DH);
144 | }
145 | }
146 | }
147 | return sb.toString();
148 | }
149 |
150 | /**
151 | * 将一个字符串.转换成排序后的字符数组
152 | *
153 | * @param str
154 | * @return
155 | */
156 | public static char[] sortCharArray(String str) {
157 | char[] chars = str.toCharArray();
158 | Arrays.sort(chars);
159 | return chars;
160 | }
161 |
162 | public static String joiner(int[] ints, String split) {
163 |
164 | if (ints.length == 0) {
165 | return EMPTY;
166 | }
167 |
168 | StringBuilder sb = new StringBuilder(String.valueOf(ints[0]));
169 |
170 | for (int i = 1; i < ints.length; i++) {
171 | sb.append(split);
172 | sb.append(ints[i]);
173 | }
174 |
175 | return sb.toString();
176 | }
177 |
178 | public static String joiner(double[] doubles, String split) {
179 |
180 | if (doubles.length == 0) {
181 | return EMPTY;
182 | }
183 |
184 | StringBuilder sb = new StringBuilder(String.valueOf(doubles[0]));
185 |
186 | for (int i = 1; i < doubles.length; i++) {
187 | sb.append(split);
188 | sb.append(doubles[i]);
189 | }
190 |
191 | return sb.toString();
192 | }
193 |
194 | public static String joiner(float[] floats, String split) {
195 |
196 | if (floats.length == 0) {
197 | return EMPTY;
198 | }
199 |
200 | StringBuilder sb = new StringBuilder(String.valueOf(floats[0]));
201 |
202 | for (int i = 1; i < floats.length; i++) {
203 | sb.append(split);
204 | sb.append(floats[i]);
205 | }
206 |
207 | return sb.toString();
208 | }
209 |
210 | public static String joiner(long[] longs, String split) {
211 |
212 | if (longs.length == 0) {
213 | return EMPTY;
214 | }
215 |
216 | StringBuilder sb = new StringBuilder(String.valueOf(longs[0]));
217 |
218 | for (int i = 1; i < longs.length; i++) {
219 | sb.append(split);
220 | sb.append(longs[i]);
221 | }
222 |
223 | return sb.toString();
224 | }
225 |
226 | public static String toString(Object obj) {
227 | if (obj == null) {
228 | return NULL;
229 | } else {
230 | return obj.toString();
231 | }
232 | }
233 |
234 | public static String joiner(Collection> c, String split) {
235 |
236 | Iterator> iterator = c.iterator();
237 |
238 | if (!iterator.hasNext()) {
239 | return EMPTY;
240 | }
241 |
242 | StringBuilder sb = new StringBuilder(iterator.next().toString());
243 |
244 | while (iterator.hasNext()) {
245 | sb.append(split);
246 | sb.append(toString(iterator.next()).toString());
247 | }
248 |
249 | return sb.toString();
250 | }
251 |
252 | public static boolean isBlank(char[] chars) {
253 | // TODO Auto-generated method stub
254 | int strLen;
255 | if (chars == null || (strLen = chars.length) == 0) {
256 | return true;
257 | }
258 | for (int i = 0; i < strLen; i++) {
259 | if (Character.isWhitespace(chars[i]) == false) {
260 | return false;
261 | }
262 | }
263 | return true;
264 | }
265 |
266 | /**
267 | * 正则匹配第一个
268 | *
269 | * @param regex
270 | * @param input
271 | * @return
272 | */
273 | public static String matcherFirst(String regex, String input) {
274 | Matcher matcher = Pattern.compile(regex).matcher(input); // 读取特征个数
275 | if (matcher.find()) {
276 | return input.substring(matcher.start(), matcher.end());
277 | } else {
278 | return null;
279 | }
280 | }
281 |
282 | /**
283 | * trim 一个字符串.扩展了string类原生的trim.对BOM和中文空格进行trim
284 | *
285 | * @return
286 | */
287 | public static String trim(String value) {
288 |
289 | if (value == null) {
290 | return value;
291 | }
292 |
293 | int len = value.length();
294 |
295 | int st = 0;
296 |
297 | while ((st < len) && (Character.isWhitespace(value.charAt(st)) || value.charAt(st) == 65279 || value.charAt(st) == 160 || value.charAt(st) == 12288)) {
298 | st++;
299 | }
300 | while ((st < len) && (Character.isWhitespace(value.charAt(len - 1)) || value.charAt(st) == 160 || value.charAt(st) == 12288)) {
301 | len--;
302 | }
303 | return ((st > 0) || (len < value.length())) ? value.substring(st, len) : value;
304 | }
305 |
306 | /**
307 | * 正则匹配全部
308 | *
309 | * @param regex
310 | * @param input
311 | * @return
312 | */
313 | public static List matcherAll(String regex, String input) {
314 | List result = new ArrayList();
315 | Matcher matcher = Pattern.compile(regex).matcher(input); // 读取特征个数
316 | while (matcher.find()) {
317 | result.add(input.substring(matcher.start(), matcher.end()));
318 | }
319 | return result;
320 | }
321 |
322 | /**
323 | * 正则匹配全部
324 | *
325 | * @param regex
326 | * @param input
327 | * @return
328 | */
329 | public static String matcherLast(String regex, String input) {
330 | List result = matcherAll(regex, input);
331 | if (result.size() == 0) {
332 | return null;
333 | } else {
334 | return result.get(result.size() - 1);
335 | }
336 | }
337 | }
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/WordAlert.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | public class WordAlert {
4 |
5 | /**
6 | * 这个就是(int)'a'
7 | */
8 | public static final int MIN_LOWER = 65345;
9 | /**
10 | * 这个就是(int)'z'
11 | */
12 | public static final int MAX_LOWER = 65370;
13 | /**
14 | * 差距进行转译需要的
15 | */
16 | public static final int LOWER_GAP = 65248;
17 | /**
18 | * 这个就是(int)'A'
19 | */
20 | public static final int MIN_UPPER = 65313;
21 | /**
22 | * 这个就是(int)'Z'
23 | */
24 | public static final int MAX_UPPER = 65338;
25 | /**
26 | * 差距进行转译需要的
27 | */
28 | public static final int UPPER_GAP = 65216;
29 | /**
30 | * 这个就是(int)'A'
31 | */
32 | public static final int MIN_UPPER_E = 65;
33 | /**
34 | * 这个就是(int)'Z'
35 | */
36 | public static final int MAX_UPPER_E = 90;
37 | /**
38 | * 差距进行转译需要的
39 | */
40 | public static final int UPPER_GAP_E = -32;
41 | /**
42 | * 这个就是(int)'0'
43 | */
44 | public static final int MIN_UPPER_N = 65296;
45 | /**
46 | * 这个就是(int)'9'
47 | */
48 | public static final int MAX_UPPER_N = 65305;
49 | /**
50 | * 差距进行转译需要的
51 | */
52 | public static final int UPPER_GAP_N = 65248;
53 |
54 | private static final char[] CHARCOVER = new char[65536];
55 |
56 | static {
57 | for (int i = 0; i < CHARCOVER.length; i++) {
58 | if (i >= MIN_LOWER && i <= MAX_LOWER) {
59 | CHARCOVER[i] = (char) (i - LOWER_GAP);
60 | } else if (i >= MIN_UPPER && i <= MAX_UPPER) {
61 | CHARCOVER[i] = (char) (i - UPPER_GAP);
62 | } else if (i >= MIN_UPPER_E && i <= MAX_UPPER_E) {
63 | CHARCOVER[i] = (char) (i - UPPER_GAP_E);
64 | } else if (i >= MIN_UPPER_N && i <= MAX_UPPER_N) {
65 | CHARCOVER[i] = (char) (i - UPPER_GAP_N);
66 | } else if (i >= '0' && i <= '9') {
67 | CHARCOVER[i] = (char) i;
68 | } else if (i >= 'a' && i <= 'z') {
69 | CHARCOVER[i] = (char) i;
70 | }
71 |
72 | // CHARCOVER['﹩'] = '$';
73 | // CHARCOVER[' '] = ' ';
74 | // CHARCOVER[','] = ',';
75 | // CHARCOVER['?'] = '?';
76 | // CHARCOVER['“'] = '"' ;
77 | // CHARCOVER['”'] = '"' ;
78 |
79 |
80 | }
81 | }
82 |
83 | /**
84 | * 对全角的字符串,大写字母进行转译.如sdfsdf
85 | *
86 | * @param chars
87 | * @param start
88 | * @param end
89 | * @return
90 | */
91 | public static String alertEnglish(char[] chars, int start, int end) {
92 | for (int i = start; i < start + end; i++) {
93 | if (chars[i] >= MIN_LOWER && chars[i] <= MAX_LOWER) {
94 | chars[i] = (char) (chars[i] - LOWER_GAP);
95 | }
96 | if (chars[i] >= MIN_UPPER && chars[i] <= MAX_UPPER) {
97 | chars[i] = (char) (chars[i] - UPPER_GAP);
98 | }
99 | if (chars[i] >= MIN_UPPER_E && chars[i] <= MAX_UPPER_E) {
100 | chars[i] = (char) (chars[i] - UPPER_GAP_E);
101 | }
102 | }
103 | return new String(chars, start, end);
104 | }
105 |
106 | public static String alertEnglish(String temp, int start, int end) {
107 | char c = 0;
108 | StringBuilder sb = new StringBuilder();
109 | for (int i = start; i < start + end; i++) {
110 | c = temp.charAt(i);
111 | if (c >= MIN_LOWER && c <= MAX_LOWER) {
112 | sb.append((char) (c - LOWER_GAP));
113 | } else if (c >= MIN_UPPER && c <= MAX_UPPER) {
114 | sb.append((char) (c - UPPER_GAP));
115 | } else if (c >= MIN_UPPER_E && c <= MAX_UPPER_E) {
116 | sb.append((char) (c - UPPER_GAP_E));
117 | } else {
118 | sb.append(c);
119 | }
120 | }
121 | return sb.toString();
122 | }
123 |
124 | public static String alertNumber(char[] chars, int start, int end) {
125 | for (int i = start; i < start + end; i++) {
126 | if (chars[i] >= MIN_UPPER_N && chars[i] <= MAX_UPPER_N) {
127 | chars[i] = (char) (chars[i] - UPPER_GAP_N);
128 | }
129 | }
130 | return new String(chars, start, end);
131 | }
132 |
133 | public static String alertNumber(String temp, int start, int end) {
134 | char c = 0;
135 | StringBuilder sb = new StringBuilder();
136 | for (int i = start; i < start + end; i++) {
137 | c = temp.charAt(i);
138 | if (c >= MIN_UPPER_N && c <= MAX_UPPER_N) {
139 | sb.append((char) (c - UPPER_GAP_N));
140 | } else {
141 | sb.append(c);
142 | }
143 | }
144 | return sb.toString();
145 | }
146 |
147 | /**
148 | * 将一个字符串标准化
149 | *
150 | * @param str
151 | * @return
152 | */
153 | public static char[] alertStr(String str) {
154 | char[] chars = new char[str.length()];
155 | char c = 0;
156 | for (int i = 0; i < chars.length; i++) {
157 | c = CHARCOVER[str.charAt(i)];
158 | if (c > 0) {
159 | chars[i] = c;
160 | } else {
161 | chars[i] = str.charAt(i);
162 | }
163 | }
164 | return chars;
165 | }
166 |
167 | /**
168 | * 判断一个字符串是否是english
169 | *
170 | * @param word
171 | * @return
172 | */
173 | public static boolean isEnglish(String word) {
174 | int length = word.length();
175 | char c;
176 | for (int i = 0; i < length; i++) {
177 | c = word.charAt(i);
178 | if ((c >= 'a' && c <= 'z') || (c >= MIN_LOWER && c <= MAX_LOWER) || (c >= MIN_UPPER && c <= MAX_UPPER) || (c >= MIN_UPPER_E && c <= MAX_UPPER_E)) {
179 | } else {
180 | return false;
181 | }
182 | }
183 | return true;
184 | }
185 |
186 | /**
187 | * 判断一个字符串是否是数字
188 | *
189 | * @param word
190 | * @return
191 | */
192 | public static boolean isNumber(String word) {
193 | char c = 0;
194 | int len = word.length();
195 | for (int i = 0; i < len; i++) {
196 | c = word.charAt(i);
197 | if ((c >= '0' && c <= '9') || c >= MIN_UPPER_N && c <= MAX_UPPER_N || c == '.') {
198 | } else {
199 | return false;
200 | }
201 | }
202 | return true;
203 | }
204 |
205 | /**
206 | * 将一个char标准化
207 | *
208 | * @param c
209 | * @return
210 | */
211 | public static char CharCover(char c) {
212 | return CHARCOVER[c];
213 | }
214 |
215 | }
216 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/WordWeight.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util;
2 |
3 | import java.util.HashMap;
4 | import java.util.List;
5 | import java.util.Map;
6 | import java.util.Map.Entry;
7 | import java.util.Set;
8 |
9 | /**
10 | * 计算词语的权重,词频统计等
11 | *
12 | * @author ansj
13 | *
14 | */
15 | public class WordWeight {
16 |
17 | private MapCount mc = new MapCount(); // 词频统计
18 |
19 | private HashMap> x2mat = new HashMap>();
20 |
21 | private MapCount x2mc = new MapCount();
22 |
23 | private Integer maxCount;
24 |
25 | private Integer recyclingCount;
26 |
27 | private double allFreq;
28 |
29 | public WordWeight() {
30 | };
31 |
32 | /**
33 | * 新的个数 = maxCount - recyclingCount; recyclingCount< maxCount
34 | *
35 | * @param maxCount
36 | * 最大值,当超过这个值后进行回收
37 | * @param recyclingCount
38 | * 回收个数
39 | */
40 | public WordWeight(Integer maxCount, Integer recyclingCount) {
41 | this.maxCount = maxCount;
42 | this.recyclingCount = recyclingCount;
43 | }
44 |
45 | public void add(String word) {
46 | add(word, 1);
47 | }
48 |
49 | public void add(String word, double weight) {
50 | allFreq += weight;
51 | mc.add(word, weight);
52 | if (maxCount != null && recyclingCount != null && mc.get().size() >= maxCount) {
53 | recycling();
54 | }
55 | }
56 |
57 | public void add(String word, String target) {
58 | add(word, target, 1);
59 | }
60 |
61 | public void add(String word, String target, double weight) {
62 | if (x2mat.containsKey(target)) {
63 | x2mat.get(target).add(word, weight);
64 | } else {
65 | x2mat.put(target, new MapCount());
66 | x2mat.get(target).add(word, weight);
67 | }
68 | x2mc.add(target, 1);
69 | add(word, weight);
70 | }
71 |
72 | /**
73 | * 导出词频统计结果
74 | *
75 | * @return
76 | */
77 | public Map export() {
78 | Map result = new HashMap();
79 | result.putAll(mc.get());
80 | return result;
81 | }
82 |
83 | /**
84 | * 导出IDF统计结果
85 | *
86 | * @return
87 | */
88 | public Map exportIDF() {
89 |
90 | Map result = new HashMap();
91 |
92 | for (Entry entry : mc.get().entrySet()) {
93 | result.put(entry.getKey(), Math.log(allFreq / entry.getValue()));
94 | }
95 |
96 | return result;
97 | }
98 |
99 | public HashMap> exportChiSquare() {
100 |
101 | HashMap> x2final = new HashMap>();
102 |
103 | double sum = allFreq;
104 |
105 | Double a, b, c, d;
106 |
107 | for (Entry> iter1 : x2mat.entrySet()) {
108 | String target = iter1.getKey();
109 | for (Entry iter2 : iter1.getValue().get().entrySet()) {
110 | String name = iter2.getKey();
111 | a = iter2.getValue();
112 | b = x2mc.get().get(target) - a;
113 | c = mc.get().get(name) - a;
114 | d = sum - b - c + a;
115 | Double x2stat = Math.pow(a * d - b * c, 2) / (a + c) / (b + d);
116 | if (x2final.get(target) != null) {
117 | x2final.get(target).add(name, x2stat);
118 | } else {
119 | x2final.put(target, new MapCount());
120 | x2final.get(target).add(name, x2stat);
121 | }
122 | }
123 | }
124 |
125 | return x2final;
126 |
127 | }
128 |
129 | /**
130 | * 回收
131 | */
132 | private void recycling() {
133 | List> list = CollectionUtil.sortMapByValue(mc.get(), -1);
134 | Set targetSet = x2mat.keySet();
135 | String word;
136 | for (int i = 0; i < recyclingCount; i++) {
137 | word = list.get(i).getKey();
138 | allFreq -= mc.get().remove(word); // 从全局中移除数字
139 | for (String target : targetSet) {
140 | Double r2 = x2mat.get(target).get().remove(word);
141 | if (r2 != null) {
142 | x2mc.add(target, -r2);
143 | }
144 | }
145 | }
146 | }
147 |
148 | }
149 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/JakartaCommonsLoggingImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | import org.apache.commons.logging.Log;
19 | import org.apache.commons.logging.LogFactory;
20 |
21 | public class JakartaCommonsLoggingImpl implements org.nlpcn.commons.lang.util.logging.Log {
22 |
23 | private Log log;
24 |
25 | /**
26 | * @since 0.2.1
27 | * @param log
28 | */
29 | public JakartaCommonsLoggingImpl(Log log){
30 | this.log = log;
31 | }
32 |
33 | public JakartaCommonsLoggingImpl(String loggerName){
34 | log = LogFactory.getLog(loggerName);
35 | }
36 |
37 | public boolean isDebugEnabled() {
38 | return log.isDebugEnabled();
39 | }
40 |
41 | public void error(String s, Throwable e) {
42 | log.error(s, e);
43 | }
44 |
45 | public void error(String s) {
46 | log.error(s);
47 | }
48 |
49 | public void debug(String s) {
50 | log.debug(s);
51 | }
52 |
53 | public void debug(String s, Throwable e) {
54 | log.debug(s, e);
55 | }
56 |
57 | public void warn(String s) {
58 | log.warn(s);
59 | }
60 |
61 | @Override
62 | public void warn(String s, Throwable e) {
63 | log.warn(s, e);
64 | }
65 |
66 |
67 | @Override
68 | public boolean isInfoEnabled() {
69 | return log.isInfoEnabled();
70 | }
71 |
72 | @Override
73 | public void info(String msg) {
74 | log.info(msg);
75 | }
76 |
77 |
78 | @Override
79 | public boolean isWarnEnabled() {
80 | return log.isWarnEnabled();
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/Jdk14LoggingImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | import java.util.logging.Level;
19 | import java.util.logging.Logger;
20 |
21 | public class Jdk14LoggingImpl implements Log {
22 |
23 | private Logger log;
24 |
25 | private String loggerName;
26 |
27 | public Jdk14LoggingImpl(String loggerName){
28 | this.loggerName = loggerName;
29 | log = Logger.getLogger(loggerName);
30 | }
31 |
32 | public boolean isDebugEnabled() {
33 | return log.isLoggable(Level.FINE);
34 | }
35 |
36 | public void error(String s, Throwable e) {
37 | log.logp(Level.SEVERE, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s, e);
38 | }
39 |
40 | public void error(String s) {
41 | log.logp(Level.SEVERE, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s);
42 | }
43 |
44 | public void debug(String s) {
45 | log.logp(Level.FINE, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s);
46 | }
47 |
48 | public void debug(String s, Throwable e) {
49 | log.logp(Level.FINE, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s, e);
50 | }
51 |
52 | public void warn(String s) {
53 | log.logp(Level.WARNING, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s);
54 | }
55 |
56 | @Override
57 | public void warn(String s, Throwable e) {
58 | log.logp(Level.WARNING, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), s, e);
59 | }
60 |
61 | @Override
62 | public boolean isInfoEnabled() {
63 | return log.isLoggable(Level.INFO);
64 | }
65 |
66 | @Override
67 | public void info(String msg) {
68 | log.logp(Level.INFO, loggerName, Thread.currentThread().getStackTrace()[1].getMethodName(), msg);
69 | }
70 |
71 | @Override
72 | public boolean isWarnEnabled() {
73 | return log.isLoggable(Level.WARNING);
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/Log.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | public interface Log {
19 |
20 | boolean isDebugEnabled();
21 |
22 | void error(String msg, Throwable e);
23 |
24 | void error(String msg);
25 |
26 | boolean isInfoEnabled();
27 |
28 | void info(String msg);
29 |
30 | void debug(String msg);
31 |
32 | void debug(String msg, Throwable e);
33 |
34 | boolean isWarnEnabled();
35 |
36 | void warn(String msg);
37 |
38 | void warn(String msg, Throwable e);
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/Log4j2Impl.java:
--------------------------------------------------------------------------------
1 | package org.nlpcn.commons.lang.util.logging;
2 |
3 | import org.apache.logging.log4j.Level;
4 | import org.apache.logging.log4j.LogManager;
5 | import org.apache.logging.log4j.Logger;
6 |
7 | /*
8 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing, software
17 | * distributed under the License is distributed on an "AS IS" BASIS,
18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | * See the License for the specific language governing permissions and
20 | * limitations under the License.
21 | */
22 | public class Log4j2Impl implements Log {
23 |
24 | private Logger log;
25 |
26 | private int errorCount;
27 | private int warnCount;
28 | private int infoCount;
29 | private int debugCount;
30 |
31 | /**
32 | * @since 0.2.21
33 | * @param log
34 | */
35 | public Log4j2Impl(Logger log){
36 | this.log = log;
37 | }
38 |
39 | public Log4j2Impl(String loggerName){
40 | log = LogManager.getLogger(loggerName);
41 | }
42 |
43 | public Logger getLog() {
44 | return log;
45 | }
46 |
47 | public boolean isDebugEnabled() {
48 | return log.isDebugEnabled();
49 | }
50 |
51 | public void error(String s, Throwable e) {
52 | errorCount++;
53 | log.error(s, e);
54 | }
55 |
56 | public void error(String s) {
57 | errorCount++;
58 | log.error(s);
59 | }
60 |
61 | public void debug(String s) {
62 | debugCount++;
63 | log.debug(s);
64 | }
65 |
66 | public void debug(String s, Throwable e) {
67 | debugCount++;
68 | log.debug(s, e);
69 | }
70 |
71 | public void warn(String s) {
72 | log.warn(s);
73 | warnCount++;
74 | }
75 |
76 | public void warn(String s, Throwable e) {
77 | log.warn(s, e);
78 | warnCount++;
79 | }
80 |
81 | public int getWarnCount() {
82 | return warnCount;
83 | }
84 |
85 | public int getErrorCount() {
86 | return errorCount;
87 | }
88 |
89 | public void resetStat() {
90 | errorCount = 0;
91 | warnCount = 0;
92 | infoCount = 0;
93 | debugCount = 0;
94 | }
95 |
96 | public int getDebugCount() {
97 | return debugCount;
98 | }
99 |
100 | public boolean isInfoEnabled() {
101 | return log.isInfoEnabled();
102 | }
103 |
104 | public void info(String msg) {
105 | infoCount++;
106 | log.info(msg);
107 | }
108 |
109 | public boolean isWarnEnabled() {
110 | return log.isEnabled(Level.WARN);
111 | }
112 |
113 | public int getInfoCount() {
114 | return infoCount;
115 | }
116 |
117 | public String toString() {
118 | return log.toString();
119 | }
120 |
121 | }
122 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/Log4jImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | import org.apache.log4j.Level;
19 | import org.apache.log4j.Logger;
20 |
21 | public class Log4jImpl implements Log {
22 |
23 | private static final String callerFQCN = Log4jImpl.class.getName();
24 |
25 | private Logger log;
26 |
27 | private int errorCount;
28 | private int warnCount;
29 | private int infoCount;
30 | private int debugCount;
31 |
32 | /**
33 | * @since 0.2.21
34 | * @param log
35 | */
36 | public Log4jImpl(Logger log){
37 | this.log = log;
38 | }
39 |
40 | public Log4jImpl(String loggerName){
41 | log = Logger.getLogger(loggerName);
42 | }
43 |
44 | public Logger getLog() {
45 | return log;
46 | }
47 |
48 | public boolean isDebugEnabled() {
49 | return log.isDebugEnabled();
50 | }
51 |
52 | public void error(String s, Throwable e) {
53 | errorCount++;
54 | log.log(callerFQCN, Level.ERROR, s, e);
55 | }
56 |
57 | public void error(String s) {
58 | errorCount++;
59 | log.log(callerFQCN, Level.ERROR, s, null);
60 | }
61 |
62 | public void debug(String s) {
63 | debugCount++;
64 | log.log(callerFQCN, Level.DEBUG, s, null);
65 | }
66 |
67 | public void debug(String s, Throwable e) {
68 | debugCount++;
69 | log.log(callerFQCN, Level.DEBUG, s, e);
70 | }
71 |
72 | public void warn(String s) {
73 | log.log(callerFQCN, Level.WARN, s, null);
74 | warnCount++;
75 | }
76 |
77 | public void warn(String s, Throwable e) {
78 | log.log(callerFQCN, Level.WARN, s, e);
79 | warnCount++;
80 | }
81 |
82 | public int getWarnCount() {
83 | return warnCount;
84 | }
85 |
86 | public int getErrorCount() {
87 | return errorCount;
88 | }
89 |
90 | public void resetStat() {
91 | errorCount = 0;
92 | warnCount = 0;
93 | infoCount = 0;
94 | debugCount = 0;
95 | }
96 |
97 | public int getDebugCount() {
98 | return debugCount;
99 | }
100 |
101 | public boolean isInfoEnabled() {
102 | return log.isInfoEnabled();
103 | }
104 |
105 | public void info(String msg) {
106 | infoCount++;
107 | log.log(callerFQCN, Level.INFO, msg, null);
108 | }
109 |
110 | public boolean isWarnEnabled() {
111 | return log.isEnabledFor(Level.WARN);
112 | }
113 |
114 | public int getInfoCount() {
115 | return infoCount;
116 | }
117 |
118 | public String toString() {
119 | return log.toString();
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/LogFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | import java.lang.reflect.Constructor;
19 |
20 | @SuppressWarnings("rawtypes")
21 | public class LogFactory {
22 |
23 | private static Constructor logConstructor;
24 |
25 | static {
26 | String logType = System.getProperty("druid.logType");
27 | if (logType != null) {
28 | if (logType.equalsIgnoreCase("slf4j")) {
29 | tryImplementation("org.slf4j.Logger", "org.nlpcn.commons.lang.util.logging.SLF4JImpl");
30 | } else if (logType.equalsIgnoreCase("log4j")) {
31 | tryImplementation("org.apache.log4j.Logger", "org.nlpcn.commons.lang.util.logging.Log4jImpl");
32 | } else if (logType.equalsIgnoreCase("log4j2")) {
33 | tryImplementation("org.apache.logging.log4j.Logger", "org.nlpcn.commons.lang.util.logging.Log4j2Impl");
34 | } else if (logType.equalsIgnoreCase("commonsLog")) {
35 | tryImplementation("org.apache.commons.logging.LogFactory", "org.nlpcn.commons.lang.util.logging.JakartaCommonsLoggingImpl");
36 | } else if (logType.equalsIgnoreCase("jdkLog")) {
37 | tryImplementation("java.util.logging.Logger", "org.nlpcn.commons.lang.util.logging.Jdk14LoggingImpl");
38 | }
39 | }
40 | // 优先选择log4j,而非Apache Common Logging. 因为后者无法设置真实Log调用者的信息
41 | tryImplementation("org.apache.log4j.Logger", "org.nlpcn.commons.lang.util.logging.Log4jImpl");
42 | tryImplementation("org.apache.logging.log4j.Logger", "org.nlpcn.commons.lang.util.logging.Log4j2Impl");
43 | tryImplementation("org.slf4j.Logger", "org.nlpcn.commons.lang.util.logging.SLF4JImpl");
44 | tryImplementation("org.apache.commons.logging.LogFactory", "org.nlpcn.commons.lang.util.logging.JakartaCommonsLoggingImpl");
45 | tryImplementation("java.util.logging.Logger", "org.nlpcn.commons.lang.util.logging.Jdk14LoggingImpl");
46 |
47 | if (logConstructor == null) {
48 | try {
49 | logConstructor = NoLoggingImpl.class.getConstructor(String.class);
50 | } catch (Exception e) {
51 | throw new IllegalStateException(e.getMessage(), e);
52 | }
53 | }
54 | }
55 |
56 | @SuppressWarnings("unchecked")
57 | private static void tryImplementation(String testClassName, String implClassName) {
58 | if (logConstructor != null) {
59 | return;
60 | }
61 |
62 | try {
63 | Resources.classForName(testClassName);
64 | Class implClass = Resources.classForName(implClassName);
65 | logConstructor = implClass.getConstructor(new Class[] { String.class });
66 |
67 | Class> declareClass = logConstructor.getDeclaringClass();
68 | if (!Log.class.isAssignableFrom(declareClass)) {
69 | logConstructor = null;
70 | }
71 |
72 | try {
73 | if (null != logConstructor) {
74 | logConstructor.newInstance(LogFactory.class.getName());
75 | }
76 | } catch (Throwable t) {
77 | logConstructor = null;
78 | }
79 |
80 | } catch (Throwable t) {
81 | // skip
82 | }
83 | }
84 |
85 | public static Log getLog(Class clazz) {
86 | return getLog(clazz.getName());
87 | }
88 |
89 | public static Log getLog(String loggerName) {
90 | try {
91 | return (Log) logConstructor.newInstance(loggerName);
92 | } catch (Throwable t) {
93 | throw new RuntimeException("Error creating logger for logger '" + loggerName + "'. Cause: " + t, t);
94 | }
95 | }
96 |
97 | /**
98 | * 获取log默认当前类,不支持android
99 | * @return
100 | */
101 | public static Log getLog() {
102 | StackTraceElement[] sts = Thread.currentThread().getStackTrace();
103 | return getLog(sts[2].getClassName());
104 | }
105 |
106 | @SuppressWarnings("unchecked")
107 | public static synchronized void selectLog4JLogging() {
108 | try {
109 | Resources.classForName("org.apache.log4j.Logger");
110 | Class implClass = Resources.classForName("org.nlpcn.commons.lang.util.logging.Log4jImpl");
111 | logConstructor = implClass.getConstructor(new Class[] { String.class });
112 | } catch (Throwable t) {
113 | //ignore
114 | }
115 | }
116 |
117 | @SuppressWarnings("unchecked")
118 | public static synchronized void selectJavaLogging() {
119 | try {
120 | Resources.classForName("java.util.logging.Logger");
121 | Class implClass = Resources.classForName("org.nlpcn.commons.lang.util.logging.Jdk14LoggingImpl");
122 | logConstructor = implClass.getConstructor(new Class[] { String.class });
123 | } catch (Throwable t) {
124 | //ignore
125 | }
126 | }
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/NoLoggingImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | public class NoLoggingImpl implements Log {
19 |
20 | private String loggerName;
21 |
22 | public NoLoggingImpl(String loggerName){
23 | this.loggerName = loggerName;
24 | }
25 |
26 | public String getLoggerName() {
27 | return this.loggerName;
28 | }
29 |
30 | public boolean isDebugEnabled() {
31 | return false;
32 | }
33 |
34 | public void error(String s, Throwable e) {
35 | error(s);
36 |
37 | if (e != null) {
38 | e.printStackTrace();
39 | }
40 | }
41 |
42 | public void error(String s) {
43 | if (s != null) {
44 | System.err.println(loggerName + " : " + s);
45 | }
46 | }
47 |
48 | public void debug(String s) {
49 | System.out.println(s);
50 | }
51 |
52 | public void debug(String s, Throwable e) {
53 | System.out.println(s+e!=null?e.getMessage():"");
54 | }
55 |
56 | public void warn(String s) {
57 | System.out.println(s);
58 | }
59 |
60 | @Override
61 | public void warn(String s, Throwable e) {
62 | System.out.println(s+","+e!=null?e.getMessage():"");
63 | }
64 |
65 |
66 | @Override
67 | public boolean isInfoEnabled() {
68 | return false;
69 | }
70 |
71 | @Override
72 | public void info(String s) {
73 | System.out.println(s);
74 | }
75 |
76 | @Override
77 | public boolean isWarnEnabled() {
78 | return false;
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/Resources.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 |
19 | /**
20 | * A class to simplify access to resources through the classloader.
21 | */
22 | public final class Resources extends Object {
23 |
24 | private static ClassLoader defaultClassLoader;
25 |
26 | private Resources(){
27 | }
28 |
29 | /**
30 | * Returns the default classloader (may be null).
31 | *
32 | * @return The default classloader
33 | */
34 | public static ClassLoader getDefaultClassLoader() {
35 | return defaultClassLoader;
36 | }
37 |
38 | /**
39 | * Sets the default classloader
40 | *
41 | * @param defaultClassLoader - the new default ClassLoader
42 | */
43 | public static void setDefaultClassLoader(ClassLoader defaultClassLoader) {
44 | Resources.defaultClassLoader = defaultClassLoader;
45 | }
46 |
47 | /**
48 | * Loads a class
49 | *
50 | * @param className - the class to load
51 | * @return The loaded class
52 | * @throws ClassNotFoundException If the class cannot be found (duh!)
53 | */
54 | public static Class> classForName(String className) throws ClassNotFoundException {
55 | Class> clazz = null;
56 | try {
57 | clazz = getClassLoader().loadClass(className);
58 | } catch (Exception e) {
59 | // Ignore. Failsafe below.
60 | }
61 | if (clazz == null) {
62 | clazz = Class.forName(className);
63 | }
64 | return clazz;
65 | }
66 |
67 | private static ClassLoader getClassLoader() {
68 | if (defaultClassLoader != null) {
69 | return defaultClassLoader;
70 | } else {
71 | return Thread.currentThread().getContextClassLoader();
72 | }
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/logging/SLF4JImpl.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1999-2101 Alibaba Group Holding Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.nlpcn.commons.lang.util.logging;
17 |
18 | import org.slf4j.Logger;
19 | import org.slf4j.LoggerFactory;
20 | import org.slf4j.spi.LocationAwareLogger;
21 |
22 | public class SLF4JImpl implements Log {
23 |
24 | private static final String callerFQCN = SLF4JImpl.class.getName();
25 | private static final Logger testLogger = LoggerFactory.getLogger(SLF4JImpl.class);
26 | static {
27 | // if the logger is not a LocationAwareLogger instance, it can not get correct stack StackTraceElement
28 | // so ignore this implementation.
29 | if (!(testLogger instanceof LocationAwareLogger)) {
30 | throw new UnsupportedOperationException(testLogger.getClass() + " is not a suitable logger");
31 | }
32 | }
33 | private LocationAwareLogger log;
34 |
35 | public SLF4JImpl(LocationAwareLogger log){
36 | this.log = log;
37 | }
38 |
39 | public SLF4JImpl(String loggerName){
40 | this.log = (LocationAwareLogger) LoggerFactory.getLogger(loggerName);
41 | }
42 |
43 | @Override
44 | public boolean isDebugEnabled() {
45 | return log.isDebugEnabled();
46 | }
47 |
48 | @Override
49 | public void error(String msg, Throwable e) {
50 | log.log(null, callerFQCN, LocationAwareLogger.ERROR_INT, msg, null, e);
51 | }
52 |
53 | @Override
54 | public void error(String msg) {
55 | log.log(null, callerFQCN, LocationAwareLogger.ERROR_INT, msg, null, null);
56 | }
57 |
58 | @Override
59 | public boolean isInfoEnabled() {
60 | return log.isInfoEnabled();
61 | }
62 |
63 | @Override
64 | public void info(String msg) {
65 | log.log(null, callerFQCN, LocationAwareLogger.INFO_INT, msg, null, null);
66 | }
67 |
68 | @Override
69 | public void debug(String msg) {
70 | log.log(null, callerFQCN, LocationAwareLogger.DEBUG_INT, msg, null, null);
71 | }
72 |
73 | @Override
74 | public void debug(String msg, Throwable e) {
75 | log.log(null, callerFQCN, LocationAwareLogger.ERROR_INT, msg, null, e);
76 | }
77 |
78 | @Override
79 | public boolean isWarnEnabled() {
80 | return log.isWarnEnabled();
81 | }
82 |
83 | @Override
84 | public void warn(String msg) {
85 | log.log(null, callerFQCN, LocationAwareLogger.WARN_INT, msg, null, null);
86 | }
87 |
88 | @Override
89 | public void warn(String msg, Throwable e) {
90 | log.log(null, callerFQCN, LocationAwareLogger.WARN_INT, msg, null, e);
91 | }
92 |
93 | }
94 |
--------------------------------------------------------------------------------
/pinyin-core/src/main/java/org/nlpcn/commons/lang/util/tuples/KeyValue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * =============================================================================
3 | *
4 | * Copyright (c) 2010, The JAVATUPLES team (http://www.javatuples.org)
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | * =============================================================================
19 | */
20 | package org.nlpcn.commons.lang.util.tuples;
21 |
22 | import java.util.Collection;
23 | import java.util.Iterator;
24 |
25 | import org.nlpcn.commons.lang.util.tuples.valueintf.IValueKey;
26 | import org.nlpcn.commons.lang.util.tuples.valueintf.IValueValue;
27 |
28 | /**
29 | *
30 | * A tuple of two elements, with positions 0 and 1 renamed as "key" and
31 | * "value", respectively.
32 | *