├── LICENSE.txt
├── README.md
├── elasticsearch-analysis-annotation-0.9.zip
├── pom.xml
└── src
└── main
├── assemblies
└── plugin.xml
├── java
└── org
│ └── elasticsearch
│ ├── index
│ └── analysis
│ │ └── annotation
│ │ ├── AnnotationAnalysisBinderProcessor.java
│ │ ├── AnnotationAnalyzerProvider.java
│ │ └── InlineAnnotationFilterFactory.java
│ ├── indices
│ └── analysis
│ │ └── annotation
│ │ ├── AnnotationIndicesAnalysis.java
│ │ └── AnnotationIndicesAnalysisModule.java
│ └── plugin
│ └── analysis
│ └── annotation
│ ├── AnnotationAnalysisPlugin.java
│ ├── AnnotationAnalyzer.java
│ └── InlineAnnotationFilter.java
└── resources
└── es-plugin.properties
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | elasticsearch-analysis-annotation
2 | =================================
3 |
4 | Analysis plugin for ElasticSearch providing capability for processing inline
5 | annotations in documents.
6 |
7 | Description
8 | -----------
9 |
10 | Inline annotations are considered to be simple semantic informations inlined in
11 | source text, that are removed from the indexed text and injected as synonyms at
12 | positions of the words they are related to.
13 |
14 | This plugin provides analyzer `AnnotationAnalyzer` as well as filter
15 | `InlineAnnotationFilter`.
16 | `AnnotationAnalyzer` is composed of `WhitespaceTokenizer`, `LowerCaseFilter` and
17 | `InlineAnnotationFilter` (with default settings).
18 | More sophisticated analyzers (equivalent to StandardAnalyzer or SnowballAnalyzer)
19 | can be configure via configuration file elasticsearch.yml or web API.
20 |
21 |
22 | Example
23 | -------
24 | Let's say we have this documents
25 | ```
26 | "Mozart[artist] was born[lifeEvent] in Salzburg[city;Austria]"
27 | ```
28 |
29 | If we parse this with StandardAnalyzer equivalent with annotation analysis added to it
30 | we get these tokens - some are omitted due to used StopFilter.
31 | ```
32 | | [austria]
33 | [artist] | | [lifeevent] | | [city]
34 | mozart | | born | | salzburg
35 | ```
36 |
37 | If we use StandardAnalyzer the result would be
38 | ```
39 | mozart | artist | | born | lifeevent | | salzburg | city | austria
40 | ```
41 |
42 |
43 | Installation
44 | ------------
45 | This plugin follows conventions for elasticsearch plugins, thus can be installed
46 | in a standard manner - see http://www.elasticsearch.org/guide/reference/modules/plugins/
47 |
48 |
49 | Using this plugin
50 | -----------------
51 | To use those custom analyzers/filters you need to either modify `elasticsearch.yml`
52 | configuration file - see http://www.elasticsearch.org/guide/reference/index-modules/analysis/ or specify
53 | index mapping via elasticsearch API.
54 |
55 | The following example configuration contains definitions for analyzers based on behaviour of
56 | StandardAnalyzer and SnowballAnalyzer.
57 |
58 | *Please note that standard_annotation and snowball_annotation analyzers use standard tokenizer,
59 | which removes all non-alphanumeric characters and thus makes it impossible to process inline
60 | annotations marked with [,],; (which are used in default behaviour of InlineAnnotationFilter).*
61 |
62 | For this purpose we need to use mapping char filter, which remaps those special characters to
63 | their equivalent, which will be accepted by standard tokenizer as part of the token.
64 |
65 | ```
66 | index :
67 | analysis :
68 | char_filter :
69 | annotation_remap :
70 | type : mapping
71 | mappings : ["[=>__annotation_start__", "]=>__annotation_end__",";=>__annotation_delimiter__"]
72 | analyzer :
73 | standard_annotation :
74 | type : custom
75 | tokenizer : standard
76 | char_filter : annotation_remap
77 | filter : [standard, lowercase, annotation_filter, stop]
78 | snowball_annotation :
79 | type : custom
80 | tokenizer : standard
81 | char_filter : annotation_remap
82 | filter : [standard, lowercase, annotation_filter, stop, snowball]
83 | filter :
84 | annotation_filter :
85 | type : annotation_filter
86 | start : __annotation_start__
87 | end : __annotation_end__
88 | delimiter : __annotation_delimiter__
89 | ```
90 |
91 | To test the analyzer you can query the following
92 | http://localhost:9200/test/_analyze?analyzer=annotation&text="Mozart[city;Salzburg]"
93 |
94 | Limitation
95 | ----------
96 | Another thing to keep in mind is that you can't use word-delimiting characters inside annotations.
97 | The whole string would be treated as two tokens which would result in unexpected behaviour.
98 |
99 | Customization
100 | -------------
101 | The InlineAnnotationFilter can be slightly customized.
102 |
103 | List of supported options
104 | + `start` - start delimiter for inline annotation
105 | + `end` - end delimiter for inline annotation
106 | + `prefix` - string to be prepended to synonym, that is created from inline annotation
107 | + `suffix` - string to be apended to synonym, that is created from inline annotation
108 | + `token-type` - token type of synonym
109 | + `delimiter` - delimiter for multiple inline annotations
110 |
111 | Example providing default values
112 | ```
113 | index :
114 | analysis :
115 | analyzer :
116 | annotation :
117 | type : annotation
118 | start : [
119 | end : ]
120 | prefix : [
121 | suffix : ]
122 | token-type: synonym
123 | delimiter : ;
124 | ```
125 |
126 |
127 | Elasticsearch version
128 | ---------------------
129 | This plugin was successfuly tested on elasticsearch version 0.90.2
--------------------------------------------------------------------------------
/elasticsearch-analysis-annotation-0.9.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/samekmichal/elasticsearch-analysis-annotation/99bd3dfdb1c9bb722b7e0dfb1645f694da8f974f/elasticsearch-analysis-annotation-0.9.zip
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | elasticsearch-analysis-annotation
6 | 4.0.0
7 | org.elasticsearch
8 | elasticsearch-analysis-annotation
9 | 1.0
10 | jar
11 | 2013
12 |
13 |
14 | The Apache Software License, Version 2.0
15 | http://www.apache.org/licenses/LICENSE-2.0.txt
16 | repo
17 |
18 |
19 |
20 |
21 | org.sonatype.oss
22 | oss-parent
23 | 7
24 |
25 |
26 |
27 | 0.90.1
28 | 4.3.0
29 |
30 |
31 |
32 |
33 | sonatype
34 | http://oss.sonatype.org/content/repositories/releases/
35 |
36 |
37 |
38 |
39 |
40 | org.elasticsearch
41 | elasticsearch
42 | ${elasticsearch.version}
43 | compile
44 |
45 |
46 |
47 | org.apache.lucene
48 | lucene-core
49 | ${lucene.version}
50 |
51 |
52 |
53 |
54 | org.testng
55 | testng
56 | 6.3.1
57 | test
58 |
59 |
60 |
61 |
62 | org.apache.lucene
63 | lucene-analyzers-common
64 | ${lucene.version}
65 |
66 |
67 | com.google.guava
68 | guava
69 | 14.0.1
70 |
71 |
72 |
73 |
74 |
75 |
76 | org.apache.maven.plugins
77 | maven-compiler-plugin
78 | 2.3.2
79 |
80 | 1.6
81 | 1.6
82 |
83 |
84 |
85 | org.apache.maven.plugins
86 | maven-surefire-plugin
87 | 2.11
88 |
89 |
90 | **/*Tests.java
91 |
92 |
93 |
94 |
95 | org.apache.maven.plugins
96 | maven-source-plugin
97 | 2.1.2
98 |
99 |
100 | attach-sources
101 |
102 | jar
103 |
104 |
105 |
106 |
107 |
108 | maven-assembly-plugin
109 | 2.3
110 |
111 | false
112 | ${project.build.directory}/releases/
113 |
114 | ${basedir}/src/main/assemblies/plugin.xml
115 |
116 |
117 |
118 |
119 | package
120 |
121 | single
122 |
123 |
124 |
125 |
126 |
127 |
128 | Analysis plugin for ElasticSearch providing capability of handling inline annotations such as:
129 | "Mozart[artist]" or "Salzburg[city;Austria]"
130 |
131 |
--------------------------------------------------------------------------------
/src/main/assemblies/plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | plugin
4 |
5 | zip
6 |
7 | false
8 |
9 |
10 | /
11 | true
12 | true
13 |
14 | org.elasticsearch:elasticsearch
15 |
16 |
17 |
18 |
19 |
20 | ${project.build.directory}/
21 | /
22 |
23 | elasticsearch-${project.name}-${elasticsearch.version}.jar
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/annotation/AnnotationAnalysisBinderProcessor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.index.analysis.annotation;
21 |
22 | import org.elasticsearch.index.analysis.AnalysisModule;
23 |
24 | /**
25 | */
26 | public class AnnotationAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
27 |
28 | @Override
29 | public void processAnalyzers(AnalyzersBindings analyzersBindings) {
30 | analyzersBindings.processAnalyzer("annotation", AnnotationAnalyzerProvider.class);
31 | }
32 | @Override
33 | public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
34 | tokenFiltersBindings.processTokenFilter("annotation_filter", InlineAnnotationFilterFactory.class);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/annotation/AnnotationAnalyzerProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.index.analysis.annotation;
21 |
22 | import org.elasticsearch.common.inject.Inject;
23 | import org.elasticsearch.common.inject.assistedinject.Assisted;
24 | import org.elasticsearch.common.settings.Settings;
25 | import org.elasticsearch.env.Environment;
26 | import org.elasticsearch.index.Index;
27 | import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
28 | import org.elasticsearch.index.settings.IndexSettings;
29 | import org.elasticsearch.plugin.analysis.annotation.AnnotationAnalyzer;
30 | import org.elasticsearch.plugin.analysis.annotation.InlineAnnotationFilter;
31 |
32 | /**
33 | */
34 | public class AnnotationAnalyzerProvider extends AbstractIndexAnalyzerProvider {
35 |
36 | private final AnnotationAnalyzer analyzer;
37 | private final String name;
38 |
39 | @Inject
40 | public AnnotationAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
41 | super(index, indexSettings, name, settings);
42 |
43 | this.name = name;
44 |
45 | InlineAnnotationFilter.settings(settings, name);
46 |
47 | analyzer = new AnnotationAnalyzer(version);
48 | }
49 |
50 | @Override
51 | public AnnotationAnalyzer get() {
52 | return this.analyzer;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/index/analysis/annotation/InlineAnnotationFilterFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.index.analysis.annotation;
21 |
22 | import org.apache.lucene.analysis.TokenStream;
23 | import org.elasticsearch.common.inject.Inject;
24 | import org.elasticsearch.common.inject.assistedinject.Assisted;
25 | import org.elasticsearch.common.settings.Settings;
26 | import org.elasticsearch.index.Index;
27 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
28 | import org.elasticsearch.index.settings.IndexSettings;
29 | import org.elasticsearch.plugin.analysis.annotation.InlineAnnotationFilter;
30 |
31 |
32 |
33 | public class InlineAnnotationFilterFactory extends AbstractTokenFilterFactory {
34 |
35 | @Inject public InlineAnnotationFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
36 | super(index, indexSettings, name, settings);
37 |
38 | InlineAnnotationFilter.settings(settings, name);
39 | }
40 |
41 | @Override public TokenStream create(TokenStream tokenStream) {
42 | return new InlineAnnotationFilter(tokenStream);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/indices/analysis/annotation/AnnotationIndicesAnalysis.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.indices.analysis.annotation;
21 |
22 | import org.apache.lucene.analysis.TokenStream;
23 | import org.elasticsearch.common.component.AbstractComponent;
24 | import org.elasticsearch.common.inject.Inject;
25 | import org.elasticsearch.common.lucene.Lucene;
26 | import org.elasticsearch.common.settings.Settings;
27 | import org.elasticsearch.index.analysis.AnalyzerScope;
28 | import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
29 | import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
30 | import org.elasticsearch.index.analysis.TokenFilterFactory;
31 | import org.elasticsearch.indices.analysis.IndicesAnalysisService;
32 | import org.elasticsearch.plugin.analysis.annotation.AnnotationAnalyzer;
33 | import org.elasticsearch.plugin.analysis.annotation.InlineAnnotationFilter;
34 |
35 | /**
36 | * Registers indices level analysis components so, if not explicitly configured,
37 | * will be shared among all indices.
38 | */
39 | public class AnnotationIndicesAnalysis extends AbstractComponent {
40 |
41 | @Inject
42 | public AnnotationIndicesAnalysis(Settings settings,
43 | IndicesAnalysisService indicesAnalysisService) {
44 | super(settings);
45 | indicesAnalysisService.analyzerProviderFactories().put(
46 | "default",
47 | new PreBuiltAnalyzerProviderFactory("default",
48 | AnalyzerScope.INDICES, new AnnotationAnalyzer(
49 | Lucene.ANALYZER_VERSION)));
50 |
51 | indicesAnalysisService.tokenFilterFactories().put("annotation_filter",
52 | new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
53 | @Override
54 | public String name() {
55 | return "annotation_filter";
56 | }
57 |
58 | @Override
59 | public TokenStream create(TokenStream tokenStream) {
60 | return new InlineAnnotationFilter(tokenStream);
61 | }
62 | }));
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/indices/analysis/annotation/AnnotationIndicesAnalysisModule.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.indices.analysis.annotation;
21 |
22 | import org.elasticsearch.common.inject.AbstractModule;
23 |
24 | /**
25 | */
26 | public class AnnotationIndicesAnalysisModule extends AbstractModule {
27 |
28 | @Override
29 | protected void configure() {
30 | bind(AnnotationIndicesAnalysis.class).asEagerSingleton();
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/plugin/analysis/annotation/AnnotationAnalysisPlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.plugin.analysis.annotation;
21 |
22 | //import com.google.common.collect.ImmutableList;
23 | import org.elasticsearch.common.collect.ImmutableList;
24 | import org.elasticsearch.common.inject.Module;
25 | import org.elasticsearch.index.analysis.AnalysisModule;
26 | import org.elasticsearch.index.analysis.annotation.AnnotationAnalysisBinderProcessor;
27 | import org.elasticsearch.indices.analysis.annotation.AnnotationIndicesAnalysisModule;
28 | import org.elasticsearch.plugins.AbstractPlugin;
29 |
30 | import java.util.Collection;
31 |
32 | /**
33 | *
34 | */
35 | public class AnnotationAnalysisPlugin extends AbstractPlugin {
36 |
37 | @Override
38 | public String name() {
39 | return "analysis-annotation";
40 | }
41 |
42 | @Override
43 | public String description() {
44 | return "Inline annotations analysis support";
45 | }
46 |
47 | @Override
48 | public Collection> modules() {
49 | return ImmutableList.>of(AnnotationIndicesAnalysisModule.class);
50 | }
51 |
52 | public void onModule(AnalysisModule module) {
53 | module.addProcessor(new AnnotationAnalysisBinderProcessor());
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/plugin/analysis/annotation/AnnotationAnalyzer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.plugin.analysis.annotation;
21 |
22 | import java.io.Reader;
23 |
24 | import org.apache.lucene.analysis.Analyzer;
25 | import org.apache.lucene.analysis.TokenStream;
26 | import org.apache.lucene.analysis.Tokenizer;
27 | import org.apache.lucene.analysis.core.LowerCaseFilter;
28 | import org.apache.lucene.analysis.core.WhitespaceTokenizer;
29 | import org.apache.lucene.util.Version;
30 |
31 | /**
32 | * Analyzer for inline annotations composed of WhitespaceTokenizer, LowerCaseFilter,
33 | * and InlineAnnotationFilter (default settings).
34 | *
35 | * @author Michal Samek, samek.michal @ gmail.com
36 | *
37 | */
38 | public class AnnotationAnalyzer extends Analyzer {
39 |
40 | private final Version version;
41 |
42 | public AnnotationAnalyzer(Version version) {
43 | this.version = version;
44 | }
45 |
46 |
47 | @Override
48 | protected TokenStreamComponents createComponents(String fieldName,
49 | Reader reader) {
50 | Tokenizer source = new WhitespaceTokenizer(version, reader);
51 | TokenStream filter = new LowerCaseFilter(version, source);
52 | filter = new InlineAnnotationFilter(filter);
53 |
54 | return new TokenStreamComponents(source, filter);
55 | }
56 |
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/plugin/analysis/annotation/InlineAnnotationFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to ElasticSearch and Shay Banon under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. ElasticSearch licenses this
6 | * file to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.elasticsearch.plugin.analysis.annotation;
21 |
22 | import java.io.IOException;
23 | import java.util.Stack;
24 |
25 | import org.apache.lucene.analysis.TokenFilter;
26 | import org.apache.lucene.analysis.TokenStream;
27 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
28 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
29 | import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
30 | import org.apache.lucene.util.AttributeSource;
31 | import org.elasticsearch.ElasticSearchIllegalArgumentException;
32 | import org.elasticsearch.common.settings.Settings;
33 |
34 | /**
35 | * Characters surrounded by '[' SYNONYM_START_DELIMITER and ']' are considered
36 | * to be a synonym to the word located before the opening '['.
37 | * eg
38 | * "W. A. Mozart[artist]"
39 | * "Salzburg[city;Austria]"
40 | *
41 | * The surrounding brackets can be changed by assigning desired characters to
42 | * SYNONYM_START_DELIMITER or SYNONYM_END_DELIMITER respectively.
43 | *
44 | * If more synonyms are present, they are delimited by SYNONYMS_DELIMITER, which
45 | * is by default ';'.
46 | *
47 | * If it is desired to surround the resulting synonym with prefix/suffix, set
48 | * SYNONYMS_PREFIX or SYNONYMS_SUFIX.
49 | *
50 | * @author Michal Samek, samek.michal @ gmail.com
51 | *
52 | */
53 |
54 | public class InlineAnnotationFilter extends TokenFilter {
55 | public static String SYNONYM_TOKEN_TYPE = "";
56 |
57 | public static String SYNONYM_START_DELIMITER = "[";
58 | public static String SYNONYM_END_DELIMITER = "]";
59 | public static String SYNONYMS_DELIMITER = ";";
60 | public static String SYNONYM_PREFIX = "[";
61 | public static String SYNONYM_SUFFIX = "]";
62 |
63 | private Stack synonymStack;
64 | private AttributeSource.State current;
65 | private final CharTermAttribute termAtt;
66 | private final PositionIncrementAttribute posIncrAtt;
67 | private final TypeAttribute typeAtt;
68 |
69 |
70 | public InlineAnnotationFilter(TokenStream input) {
71 | super(input);
72 | synonymStack = new Stack();
73 | this.termAtt = addAttribute(CharTermAttribute.class);
74 | this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
75 | this.typeAtt = addAttribute(TypeAttribute.class);
76 | }
77 |
78 | @Override
79 | public boolean incrementToken() throws IOException {
80 | if (synonymStack.size() > 0) {
81 | popAliasFromStack();
82 | return true;
83 | }
84 |
85 | if (!input.incrementToken()) {
86 | return false;
87 | }
88 |
89 | if (addAliasesToStack()) {
90 | if (termAtt.length() == 0)
91 | {
92 | popAliasFromStack();
93 | }
94 | }
95 |
96 | current = captureState();
97 | return true;
98 | }
99 |
100 | private void popAliasFromStack()
101 | {
102 | String syn = SYNONYM_PREFIX + synonymStack.pop() + SYNONYM_SUFFIX;
103 | restoreState(current);
104 | termAtt.copyBuffer(syn.toCharArray(), 0, syn.length());
105 | typeAtt.setType(SYNONYM_TOKEN_TYPE);
106 | posIncrAtt.setPositionIncrement(0);
107 | }
108 |
109 |
110 | /**
111 | * Checks whether current token has synonyms appended. If it has, then they
112 | * are pushed on the synonymStack.
113 | *
114 | * @return true if synonyms were found, otherwise false
115 | */
116 | private boolean addAliasesToStack() {
117 | String buffer = termAtt.toString();
118 | String synonyms = null;
119 | int length = buffer.length();
120 |
121 | searchingLoop:
122 | for (int i = 0; i < length; i++) {
123 | if (buffer.startsWith(SYNONYM_START_DELIMITER,i)) {
124 |
125 | // It might not be necessary to search for closing delimiter
126 | int synonyms_start = i + SYNONYM_START_DELIMITER.length();
127 | for (int j = synonyms_start; j < length; j++) {
128 | if (buffer.startsWith(SYNONYM_END_DELIMITER,j)) {
129 | synonyms = buffer.substring(synonyms_start, j);
130 | termAtt.setLength(i);
131 | break searchingLoop;
132 | }
133 | }
134 | }
135 | }
136 |
137 | // No synonyms have been found
138 | if (synonyms == null) {
139 | return false;
140 | }
141 |
142 |
143 | int beginIndex = 0;
144 | int endIndex = -1;
145 | while ((endIndex = synonyms.indexOf(SYNONYMS_DELIMITER, beginIndex)) != -1) {
146 | synonymStack.push(synonyms.substring(beginIndex, endIndex).trim());
147 | beginIndex = endIndex+SYNONYMS_DELIMITER.length();
148 | }
149 |
150 | // Single synonym, which is not ended by SYNONYMS_DELIMITER, eq [artist]
151 | // Last synonym, which is not ended by SYNONYMS_DELIMITER, eq [city;Austria]
152 | // For [city;Austria;] the beginIndex will be set to index, that is equal to the string length
153 | if (beginIndex < synonyms.length() && synonyms.length() > 0) {
154 | synonymStack.push(synonyms.substring(beginIndex).trim());
155 | }
156 | return true;
157 | }
158 |
159 |
160 | /**
161 | * Process settings passed by ElasticSearch during initialization.
162 | * Recognised settings are:
163 | * start - start delimiter for inline annotation
164 | * end - end delimiter for inline annotation
165 | * prefix - string to be prepended to synonym, that is created from inline annotation
166 | * suffix - string to be apended to synonym, that is created from inline annotation
167 | * token-type - token type of synonym
168 | * delimiter - delimiter for multiple inline annotations
169 | * @param settings
170 | * @param name - logical name of the analyzer
171 | */
172 | public static void settings(Settings settings, String name) {
173 | String start_delim, end_delim, syn_prefix, syn_suffix, delimiter, token_type;
174 | start_delim = settings.get("start");
175 | end_delim = settings.get("end");
176 | syn_prefix = settings.get("prefix");
177 | syn_suffix = settings.get("suffix");
178 | delimiter = settings.get("delimiter");
179 | token_type = settings.get("token-type");
180 |
181 |
182 | if (start_delim != null) {
183 | if (start_delim.length() == 0) {
184 | throw new ElasticSearchIllegalArgumentException(
185 | "Analyzer " + name + " has invalid settings: start " +
186 | "delimiter cannot be empty string");
187 | }
188 | InlineAnnotationFilter.SYNONYM_START_DELIMITER = start_delim;
189 | }
190 |
191 | if (end_delim != null) {
192 | if (end_delim.length() == 0) {
193 | throw new ElasticSearchIllegalArgumentException(
194 | "Analyzer " + name + " has invalid settings: end " +
195 | "delimiter cannot be empty string");
196 | }
197 | InlineAnnotationFilter.SYNONYM_END_DELIMITER = end_delim;
198 | }
199 |
200 | if (syn_prefix != null) {
201 | InlineAnnotationFilter.SYNONYM_PREFIX = syn_prefix;
202 | }
203 |
204 | if (syn_suffix != null) {
205 | InlineAnnotationFilter.SYNONYM_SUFFIX = syn_suffix;
206 | }
207 |
208 | if (delimiter != null) {
209 | if (delimiter.length() == 0) {
210 | throw new ElasticSearchIllegalArgumentException(
211 | "Analyzer " + name + " has invalid settings: " +
212 | "delimiter cannot be empty string");
213 | }
214 | InlineAnnotationFilter.SYNONYMS_DELIMITER = delimiter;
215 | }
216 |
217 | if (token_type != null) {
218 | InlineAnnotationFilter.SYNONYM_TOKEN_TYPE = token_type;
219 | }
220 | }
221 | }
222 |
--------------------------------------------------------------------------------
/src/main/resources/es-plugin.properties:
--------------------------------------------------------------------------------
1 | plugin=org.elasticsearch.plugin.analysis.annotation.AnnotationAnalysisPlugin
--------------------------------------------------------------------------------