├── licenses ├── commons-codec-1.11.jar.sha1 ├── lucene-analyzers-phonetic-8.4.0.jar.sha1 ├── commons-codec-NOTICE.txt ├── lucene-NOTICE.txt ├── commons-codec-LICENSE.txt └── lucene-LICENSE.txt ├── settings.gradle ├── versions.properties ├── dist ├── keyboard-layout-7.6.0.zip ├── keyboard-layout-7.6.1.zip └── keyboard-layout-7.6.2.zip ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .travis.yml ├── .gitignore ├── src ├── test │ ├── resources │ │ └── rest-api-spec │ │ │ └── test │ │ │ └── keyboard_layout │ │ │ ├── 5_plugin_version.yml │ │ │ ├── 90_switch_suggester_ukrainian_language.yml │ │ │ ├── 95_switch_suggester_belarusian_language.yml │ │ │ ├── 100_switch_suggester_empty.yml │ │ │ ├── 70_switch_suggester_option_min_freq.yml │ │ │ ├── 80_switch_suggester_option_max_freq.yml │ │ │ ├── 20_switch_suggester_defaults.yml │ │ │ ├── 40_switch_suggester_option_lowercase_token.yml │ │ │ ├── 50_switch_suggester_option_preserve_case.yml │ │ │ ├── 30_switch_suggester_option_analyzer.yml │ │ │ └── 60_switch_suggester_option_add_original.yml │ └── java │ │ ├── org │ │ └── elasticsearch │ │ │ └── plugin │ │ │ └── keyboard │ │ │ └── RussianKeyboardLayoutRestIT.java │ │ └── com │ │ └── github │ │ └── papahigh │ │ └── keyboardswitcher │ │ ├── KeyboardSwitcherProviderTests.java │ │ ├── RussianKeyboardSwitcherTests.java │ │ ├── UkrainianKeyboardSwitcherTests.java │ │ └── BelarusianKeyboardSwitcherTests.java └── main │ └── java │ ├── com │ └── github │ │ └── papahigh │ │ └── keyboardswitcher │ │ ├── KeyboardSwitcherProvider.java │ │ ├── Languages.java │ │ ├── RussianKeyboardSwitcher.java │ │ ├── BelarusianKeyboardSwitcher.java │ │ ├── UkrainianKeyboardSwitcher.java │ │ └── KeyboardSwitcher.java │ └── org │ └── elasticsearch │ ├── index │ └── analysis │ │ └── keyboard │ │ ├── KeyboardLayoutTokenizerFactory.java │ │ └── KeyboardAnalyzerProvider.java │ ├── search │ └── suggest │ │ └── keyboard │ │ ├── KeyboardLayoutSuggestionContext.java │ │ ├── KeyboardLayoutSuggestion.java │ │ ├── KeyboardLayoutSuggester.java │ │ └── KeyboardLayoutSuggestionBuilder.java │ └── plugin │ └── KeyboardLayoutPlugin.java ├── NOTICE.txt ├── docker-compose.yml ├── HEADER.txt ├── releases.asciidoc ├── gradlew.bat ├── gradlew ├── README.asciidoc └── LICENSE.txt /licenses/commons-codec-1.11.jar.sha1: -------------------------------------------------------------------------------- 1 | 3acb4705652e16236558f0f4f2192cc33c3bd189 -------------------------------------------------------------------------------- /licenses/lucene-analyzers-phonetic-8.4.0.jar.sha1: -------------------------------------------------------------------------------- 1 | e47cb4efd4e9e071e0563dd44f1759ea1e712ca7 -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'Russian Keyboard Layout Suggestions' 2 | include ':rest-api-spec' -------------------------------------------------------------------------------- /versions.properties: -------------------------------------------------------------------------------- 1 | # main 2 | elasticsearch = 7.6.0 3 | lucene = 8.4.0 4 | commonscodec = 1.11 -------------------------------------------------------------------------------- /dist/keyboard-layout-7.6.0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papahigh/elasticsearch-keyboard-layout/HEAD/dist/keyboard-layout-7.6.0.zip -------------------------------------------------------------------------------- /dist/keyboard-layout-7.6.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papahigh/elasticsearch-keyboard-layout/HEAD/dist/keyboard-layout-7.6.1.zip -------------------------------------------------------------------------------- /dist/keyboard-layout-7.6.2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papahigh/elasticsearch-keyboard-layout/HEAD/dist/keyboard-layout-7.6.2.zip -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papahigh/elasticsearch-keyboard-layout/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | jdk: openjdk-13 3 | sudo: false 4 | before_install: chmod +x gradlew 5 | 6 | script: 7 | - ./gradlew build 8 | - ./gradlew check 9 | - ./gradlew codeCoverageReport 10 | 11 | after_success: 12 | - bash <(curl -s https://codecov.io/bash) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | /build/ 3 | target/ 4 | out/ 5 | 6 | 7 | mvn-r‌​epo 8 | 9 | ### STS ### 10 | .apt_generated 11 | .classpath 12 | .factorypath 13 | .project 14 | .settings 15 | .springBeans 16 | 17 | ### IntelliJ IDEA ### 18 | **.idea 19 | *.iws 20 | **.iml 21 | *.ipr 22 | *.log 23 | 24 | ### NetBeans ### 25 | nbproject/private/ 26 | build/ 27 | nbbuild/ 28 | nbdist/ 29 | .nb-gradle/ -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/5_plugin_version.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Plugin Version": 2 | 3 | - skip: 4 | reason: "contains is a newly added assertion" 5 | features: contains 6 | - do: 7 | cluster.state: {} 8 | - set: { master_node: master } 9 | - do: 10 | nodes.info: {} 11 | - contains: { nodes.$master.plugins: { name: keyboard-layout } } 12 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Elasticsearch plugin for keyboard layout suggestions 2 | Copyright 2019 Nikolay Papakha 3 | 4 | Released under version 2.0 of the Apache Licence 5 | Repository: https://github.com/papahigh/elasticsearch-keyboard-layout 6 | 7 | --- 8 | This product includes software developed by 9 | The Apache Software Foundation (http://www.apache.org/). 10 | 11 | Apache Lucene 12 | Copyright 2014 The Apache Software Foundation 13 | 14 | Apache Commons Codec 15 | Copyright 2002-2014 The Apache Software Foundation -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | # Environment for elasticsearch 7.6.x 4 | services: 5 | # run tests 6 | tests: 7 | image: openjdk:13-oracle 8 | volumes: 9 | - ./:/code 10 | environment: 11 | - JAVA13_HOME=/usr/java/openjdk-13 12 | working_dir: /code 13 | command: 14 | ./gradlew check 15 | 16 | # run assemble 17 | assemble: 18 | image: openjdk:13-oracle 19 | volumes: 20 | - ./:/code 21 | environment: 22 | - JAVA13_HOME=/usr/java/openjdk-13 23 | working_dir: /code 24 | command: 25 | ./gradlew assemble -------------------------------------------------------------------------------- /HEADER.txt: -------------------------------------------------------------------------------- 1 | Copyright ${year} ${developer} 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /licenses/commons-codec-NOTICE.txt: -------------------------------------------------------------------------------- 1 | Apache Commons Codec 2 | Copyright 2002-2014 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java 8 | contains test data from http://aspell.net/test/orig/batch0.tab. 9 | Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) 10 | 11 | =============================================================================== 12 | 13 | The content of package org.apache.commons.codec.language.bm has been translated 14 | from the original php source code available at http://stevemorse.org/phoneticinfo.htm 15 | with permission from the original authors. 16 | Original source copyright: 17 | Copyright (c) 2008 Alexander Beider & Stephen P. Morse. -------------------------------------------------------------------------------- /releases.asciidoc: -------------------------------------------------------------------------------- 1 | = Compatible Releases 2 | Nikolay Papakha 3 | 4 | The following table shows the compatible versions of Elasticsearch and Russian keyboard layout suggestions plugin. 5 | Please find an appropriate release which corresponds to your elasticsearch version. 6 | 7 | .Compatible Releases 8 | [width="100%",cols=">.^3,>.^4,<.^10",options="header"] 9 | |============================================== 10 | | Elasticsearch version | Plugin version| URL 11 | | 7.6.0 | 7.6.0 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.6.0/dist/keyboard-layout-7.6.0.zip 12 | | 7.3.0 | 7.3.0 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.3.0/dist/keyboard-layout-7.3.0.zip 13 | | 7.2.0 | 7.2.0 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.2.0/dist/keyboard-layout-7.2.0.zip 14 | | 7.1.0 | 7.1.0 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.1.0/dist/keyboard-layout-7.1.0.zip 15 | | 7.0.1 | 7.0.1 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.0.1/dist/keyboard-layout-7.0.1.zip 16 | | 7.0.0 | 7.0.0 | https://github.com/papahigh/elasticsearch-keyboard-layout/raw/master/dist/keyboard-layout-7.0.0.zip 17 | |============================================== 18 | 19 | == Requirements 20 | 21 | - adopt-openjdk 13.0.2 22 | 23 | See elasticsearch repository for details -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/plugin/keyboard/RussianKeyboardLayoutRestIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.plugin.keyboard; 17 | 18 | import com.carrotsearch.randomizedtesting.annotations.Name; 19 | import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; 20 | import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; 21 | import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; 22 | 23 | 24 | public class RussianKeyboardLayoutRestIT extends ESClientYamlSuiteTestCase { 25 | 26 | public RussianKeyboardLayoutRestIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { 27 | super(testCandidate); 28 | } 29 | 30 | @ParametersFactory 31 | public static Iterable parameters() throws Exception { 32 | return ESClientYamlSuiteTestCase.createParameters(); 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/KeyboardSwitcherProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import java.util.EnumMap; 19 | import java.util.Locale; 20 | 21 | public class KeyboardSwitcherProvider { 22 | 23 | private static final EnumMap cache = new EnumMap<>(Languages.class); 24 | 25 | public static KeyboardSwitcher provide(String language) { 26 | if (language == null) { 27 | throw new IllegalArgumentException("No language was provided"); 28 | } 29 | try { 30 | Languages lang = Languages.valueOf(language.toUpperCase(Locale.ROOT)); 31 | return cache.computeIfAbsent(lang, Languages::newInstance); 32 | } catch (IllegalArgumentException e) { 33 | throw new IllegalArgumentException("Unknown language: " + language, e); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/keyboard/KeyboardLayoutTokenizerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.index.analysis.keyboard; 17 | 18 | import org.apache.lucene.analysis.Tokenizer; 19 | import org.apache.lucene.analysis.core.WhitespaceTokenizer; 20 | import org.elasticsearch.common.settings.Settings; 21 | import org.elasticsearch.env.Environment; 22 | import org.elasticsearch.index.IndexSettings; 23 | import org.elasticsearch.index.analysis.AbstractTokenizerFactory; 24 | 25 | public class KeyboardLayoutTokenizerFactory extends AbstractTokenizerFactory { 26 | 27 | public KeyboardLayoutTokenizerFactory(IndexSettings indexSettings, 28 | Environment env, String name, Settings settings) { 29 | super(indexSettings, settings, name); 30 | } 31 | 32 | @Override 33 | public Tokenizer create() { 34 | return new WhitespaceTokenizer(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/index/analysis/keyboard/KeyboardAnalyzerProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.index.analysis.keyboard; 17 | 18 | import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 19 | import org.elasticsearch.common.settings.Settings; 20 | import org.elasticsearch.env.Environment; 21 | import org.elasticsearch.index.IndexSettings; 22 | import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; 23 | 24 | 25 | public class KeyboardAnalyzerProvider extends AbstractIndexAnalyzerProvider { 26 | 27 | private final WhitespaceAnalyzer analyzer; 28 | 29 | public KeyboardAnalyzerProvider(IndexSettings indexSettings, Environment env, 30 | String name, Settings settings) { 31 | super(indexSettings, name, settings); 32 | 33 | analyzer = new WhitespaceAnalyzer(); 34 | analyzer.setVersion(version); 35 | } 36 | 37 | @Override 38 | public WhitespaceAnalyzer get() { 39 | return this.analyzer; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/Languages.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | 19 | /** 20 | * Represents an enumeration of all supported keyboard layouts 21 | * and their mapping to the implementation class. 22 | */ 23 | public enum Languages { 24 | 25 | RUSSIAN("RussianKeyboardSwitcher"), 26 | 27 | UKRAINIAN("UkrainianKeyboardSwitcher"), 28 | 29 | BELARUSIAN("BelarusianKeyboardSwitcher"); 30 | 31 | 32 | private final String clazzName; 33 | 34 | Languages(String clazzName) { 35 | this.clazzName = clazzName; 36 | } 37 | 38 | public KeyboardSwitcher newInstance() { 39 | try { 40 | Class switcherClass = Class.forName("com.github.papahigh.keyboardswitcher." 41 | + clazzName).asSubclass(KeyboardSwitcher.class); 42 | return switcherClass.getConstructor().newInstance(); 43 | } catch (Exception e) { 44 | throw new IllegalArgumentException(e); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/RussianKeyboardSwitcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | 19 | import java.util.Objects; 20 | 21 | /** 22 | * KeyboardSwitcher for Russian/English keyboard layout 23 | */ 24 | public class RussianKeyboardSwitcher extends KeyboardSwitcher { 25 | 26 | private static final char[] charMappings; 27 | 28 | @Override 29 | protected char[] getCharMappings() { 30 | return charMappings; 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | return Objects.hashCode(RussianKeyboardSwitcher.class); 36 | } 37 | 38 | @Override 39 | public boolean equals(Object obj) { 40 | return obj instanceof RussianKeyboardSwitcher; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "RussianKeyboardSwitcher{}"; 46 | } 47 | 48 | static { 49 | 50 | charMappings = new char['ё' + 1]; 51 | 52 | String eng = "qwertyuiop[]QWERTYUIOP{}asdfghjkl;'\\ASDFGHJKL:\"|zxcvbnm,.ZXCVBNM<>"; 53 | String rus = "йцукенгшщзхъЙЦУКЕНГШЩЗХЪфывапролджэёФЫВАПРОЛДЖЭЁячсмитьбюЯЧСМИТЬБЮ"; 54 | 55 | for (int i = 0; i < eng.length(); i++) { 56 | charMappings[eng.charAt(i)] = rus.charAt(i); 57 | charMappings[rus.charAt(i)] = eng.charAt(i); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/BelarusianKeyboardSwitcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | 19 | import java.util.Objects; 20 | 21 | /** 22 | * KeyboardSwitcher for Belarusian/English keyboard layout 23 | */ 24 | public class BelarusianKeyboardSwitcher extends KeyboardSwitcher { 25 | 26 | private static final char[] charMappings; 27 | 28 | @Override 29 | public char[] getCharMappings() { 30 | return charMappings; 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | return Objects.hashCode(RussianKeyboardSwitcher.class); 36 | } 37 | 38 | @Override 39 | public boolean equals(Object obj) { 40 | return obj instanceof BelarusianKeyboardSwitcher; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "BelarusianKeyboardSwitcher{}"; 46 | } 47 | 48 | 49 | static { 50 | 51 | charMappings = new char['ў' + 1]; 52 | 53 | String eng = "qwertyuiop[QWERTYUIOP{asdfghjkl;'ASDFGHJKL:\"zxcvbnm,.ZXCVBNM<>"; 54 | String by = "йцукенгшўзхЙЦУКЕНГШЎЗХфывапролджэФЫВАПРОЛДЖЭячсмітьбюЯЧСМІТЬБЮ"; 55 | 56 | for (int i = 0; i < eng.length(); i++) { 57 | charMappings[eng.charAt(i)] = by.charAt(i); 58 | charMappings[by.charAt(i)] = eng.charAt(i); 59 | } 60 | 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/UkrainianKeyboardSwitcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import java.util.Objects; 19 | 20 | /** 21 | * KeyboardSwitcher for Ukrainian/English keyboard layout 22 | */ 23 | public class UkrainianKeyboardSwitcher extends KeyboardSwitcher { 24 | 25 | private static final char[] charMappings; 26 | 27 | @Override 28 | protected char[] getCharMappings() { 29 | return charMappings; 30 | } 31 | 32 | @Override 33 | public int hashCode() { 34 | return Objects.hashCode(RussianKeyboardSwitcher.class); 35 | } 36 | 37 | @Override 38 | public boolean equals(Object obj) { 39 | return obj instanceof UkrainianKeyboardSwitcher; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return "UkrainianKeyboardSwitcher{}"; 45 | } 46 | 47 | static { 48 | 49 | charMappings = new char['ґ' + 1]; 50 | 51 | String eng = "qwertyuiop[]QWERTYUIOP{}asdfghjkl;'ASDFGHJKL:\"\\zxcvbnm,.|ZXCVBNM<>"; 52 | String ukr = "йцукенгшщзхїЙЦУКЕНГШЩЗХЇфівапролджєФІВАПРОЛДЖЄґячсмитьбюҐЯЧСМИТЬБЮ"; 53 | 54 | for (int i = 0; i < eng.length(); i++) { 55 | charMappings[eng.charAt(i)] = ukr.charAt(i); 56 | charMappings[ukr.charAt(i)] = eng.charAt(i); 57 | } 58 | 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/search/suggest/keyboard/KeyboardLayoutSuggestionContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.search.suggest.keyboard; 17 | 18 | import com.github.papahigh.keyboardswitcher.KeyboardSwitcher; 19 | import org.elasticsearch.index.query.QueryShardContext; 20 | import org.elasticsearch.search.suggest.SuggestionSearchContext; 21 | 22 | final class KeyboardLayoutSuggestionContext extends SuggestionSearchContext.SuggestionContext { 23 | 24 | final KeyboardSwitcher switcher; 25 | 26 | final double minFreq; 27 | final double maxFreq; 28 | final boolean lowercaseToken; 29 | final boolean addOriginal; 30 | final boolean preserveCase; 31 | 32 | KeyboardLayoutSuggestionContext(QueryShardContext shardContext, KeyboardSwitcher switcher, 33 | double minFreq, double maxFreq, boolean lowercaseToken, boolean preserveCase, boolean addOriginal) { 34 | super(KeyboardLayoutSuggester.INSTANCE, shardContext); 35 | this.switcher = switcher; 36 | this.minFreq = minFreq; 37 | this.maxFreq = maxFreq; 38 | this.lowercaseToken = lowercaseToken; 39 | this.preserveCase = preserveCase; 40 | this.addOriginal = addOriginal; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "KeyboardSwitchSuggestionContext[" + 46 | ", switcher=" + switcher + 47 | ", minFreq=" + minFreq + 48 | ", maxFreq=" + maxFreq + 49 | ", lowercaseToken=" + lowercaseToken + 50 | ", addOriginal=" + addOriginal + 51 | ", preserveCase=" + preserveCase + 52 | ", context=" + super.toString() + 53 | "]"; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/plugin/KeyboardLayoutPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.plugin; 17 | 18 | import org.apache.lucene.analysis.Analyzer; 19 | import org.elasticsearch.index.analysis.AnalyzerProvider; 20 | import org.elasticsearch.index.analysis.TokenizerFactory; 21 | import org.elasticsearch.index.analysis.keyboard.KeyboardAnalyzerProvider; 22 | import org.elasticsearch.index.analysis.keyboard.KeyboardLayoutTokenizerFactory; 23 | import org.elasticsearch.indices.analysis.AnalysisModule; 24 | import org.elasticsearch.plugins.AnalysisPlugin; 25 | import org.elasticsearch.plugins.Plugin; 26 | import org.elasticsearch.plugins.SearchPlugin; 27 | import org.elasticsearch.search.suggest.keyboard.KeyboardLayoutSuggestion; 28 | import org.elasticsearch.search.suggest.keyboard.KeyboardLayoutSuggestionBuilder; 29 | 30 | import java.util.Collections; 31 | import java.util.List; 32 | import java.util.Map; 33 | 34 | import static java.util.Collections.singletonMap; 35 | 36 | 37 | public class KeyboardLayoutPlugin extends Plugin implements SearchPlugin, AnalysisPlugin { 38 | 39 | @Override 40 | public List> getSuggesters() { 41 | return Collections.singletonList( 42 | new SearchPlugin.SuggesterSpec<>( 43 | KeyboardLayoutSuggestionBuilder.SUGGESTION_NAME, 44 | KeyboardLayoutSuggestionBuilder::new, 45 | KeyboardLayoutSuggestionBuilder::fromXContent, 46 | KeyboardLayoutSuggestion::new 47 | ) 48 | ); 49 | } 50 | 51 | @Override 52 | public Map>> getAnalyzers() { 53 | return singletonMap("keyboard_analyzer", KeyboardAnalyzerProvider::new); 54 | } 55 | 56 | @Override 57 | public Map> getTokenizers() { 58 | return singletonMap("keyboard_tokenizer", KeyboardLayoutTokenizerFactory::new); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/github/papahigh/keyboardswitcher/KeyboardSwitcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import org.apache.commons.codec.EncoderException; 19 | import org.apache.commons.codec.StringEncoder; 20 | 21 | import java.nio.charset.Charset; 22 | 23 | 24 | /** 25 | * Encodes an input string into its switched variant 26 | * according to a keyboard layout. 27 | */ 28 | public abstract class KeyboardSwitcher implements StringEncoder { 29 | 30 | static String EMPTY_STRING = new String("".getBytes(Charset.defaultCharset()), Charset.defaultCharset()); 31 | 32 | protected abstract char[] getCharMappings(); 33 | 34 | public char[] switchLayout(char[] source, int offset, int length, boolean replace) { 35 | if (source == null || source.length == 0) { 36 | return new char[0]; 37 | } 38 | char[] switched = replace ? source : new char[length]; 39 | char[] charMappings = getCharMappings(); 40 | for (int i = offset; i < offset + length; i++) { 41 | final char curr = source[i]; 42 | if (curr > 0 && curr < charMappings.length) { 43 | char mapped = charMappings[curr]; 44 | switched[i] = mapped != Character.MIN_VALUE ? mapped : curr; 45 | } 46 | } 47 | return switched; 48 | } 49 | 50 | @Override 51 | public String encode(String source) { 52 | int length; 53 | if (source == null || (length = source.length()) == 0) { 54 | return EMPTY_STRING; 55 | } 56 | char[] chars = source.toCharArray(); 57 | return new String(switchLayout(chars, 0, length, true)); 58 | } 59 | 60 | @Override 61 | public Object encode(Object source) throws EncoderException { 62 | if (source instanceof String) { 63 | return encode((String) source); 64 | } 65 | if (source == null) { 66 | return EMPTY_STRING; 67 | } 68 | throw new EncoderException("Unsupported parameter type supplied to [KeyboardSwitcher]"); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/90_switch_suggester_ukrainian_language.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [ukrainian language]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_lang_ukr 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | filter: ["lowercase"] 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_lang_ukr 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "1"}}' 36 | - '{ "content": "Новини - Зернові технології" }' 37 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "2"}}' 38 | - '{ "content": "Компанія “SUPER-Нові технології 1700” пропонує тільки найкраще обладнання від виробників " }' 39 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "3"}}' 40 | - '{ "content": "Чемодан Xiaomi" }' 41 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "4"}}' 42 | - '{ "content": "Мобільні технології це портал про ґаджети" }' 43 | 44 | - do: 45 | indices.refresh: 46 | index: "_all" 47 | 48 | - do: 49 | search: 50 | size: 0 51 | index: suggester_lang_ukr 52 | body: 53 | suggest: 54 | text: 'nt[yjkjus]' 55 | keyboard_layout_default: 56 | keyboard_layout: 57 | field: content 58 | language: ukrainian 59 | lowercase_token: true 60 | add_original: true 61 | 62 | - length: { suggest.keyboard_layout_default: 1 } 63 | - match: { suggest.keyboard_layout_default.0.text: 'nt[yjkjus]' } 64 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 65 | - match: { suggest.keyboard_layout_default.0.length: 10 } 66 | - length: { suggest.keyboard_layout_default.0.options: 2 } 67 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'технології' } 68 | - match: { suggest.keyboard_layout_default.0.options.0.freq: 3 } 69 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 70 | - match: { suggest.keyboard_layout_default.0.options.1.text: 'nt[yjkjus]' } 71 | - match: { suggest.keyboard_layout_default.0.options.1.freq: 0 } 72 | - match: { suggest.keyboard_layout_default.0.options.1.switch: false } 73 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/95_switch_suggester_belarusian_language.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [belarusian language]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_max_freq 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | filter: ["lowercase"] 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_max_freq 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "1"}}' 36 | - '{ "content": "За апошнія некалькі дзён міліцыя выявіла два выпадкі сексуальнага гвалту над непаўналетнімі." }' 37 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "2"}}' 38 | - '{ "content": "Праз некалькі дзён пацярпелая выявіла, што пералічаныя ..." }' 39 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "3"}}' 40 | - '{ "content": "Было некалькі выпадкаў, калі спачатку кіраўніцтва БелАЭС" }' 41 | - '{"index": {"_index": "suggester_lang_ukr", "_id": "4"}}' 42 | - '{ "content": "І мы ўжо адсачылі некалькі такіх выпадкаў, паведамілі пра іх: пра тое, што некаторыя службы, не буду называць якія, задавалі пытаньні" }' 43 | 44 | - do: 45 | indices.refresh: 46 | index: "_all" 47 | 48 | - do: 49 | search: 50 | size: 0 51 | index: suggester_lang_ukr 52 | body: 53 | suggest: 54 | text: 'dsgflrfo' 55 | keyboard_layout_default: 56 | keyboard_layout: 57 | field: content 58 | language: belarusian 59 | lowercase_token: true 60 | add_original: true 61 | 62 | - length: { suggest.keyboard_layout_default: 1 } 63 | - match: { suggest.keyboard_layout_default.0.text: 'dsgflrfo' } 64 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 65 | - match: { suggest.keyboard_layout_default.0.length: 8 } 66 | - length: { suggest.keyboard_layout_default.0.options: 2 } 67 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'выпадкаў' } 68 | - match: { suggest.keyboard_layout_default.0.options.0.freq: 2 } 69 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 70 | - match: { suggest.keyboard_layout_default.0.options.1.text: 'dsgflrfo' } 71 | - match: { suggest.keyboard_layout_default.0.options.1.freq: 0 } 72 | - match: { suggest.keyboard_layout_default.0.options.1.switch: false } 73 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/100_switch_suggester_empty.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [empty options]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_defaults 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | mappings: 24 | properties: 25 | content: 26 | type: text 27 | analyzer: "my_analyzer" 28 | 29 | - do: 30 | bulk: 31 | index: suggester_defaults 32 | refresh: true 33 | body: 34 | - '{"index": {"_index": "suggester_defaults", "_id": "1"}}' 35 | - '{ "content": "Кроссовки женские Nike MD Runner 2" }' 36 | - '{"index": {"_index": "suggester_defaults", "_id": "2"}}' 37 | - '{ "content": "Валенки мужские Nike MD Runner 2" }' 38 | - '{"index": {"_index": "suggester_defaults", "_id": "3"}}' 39 | - '{ "content": "Кроссовки для мальчиков Nike MD Runner 2." }' 40 | - '{"index": {"_index": "suggester_defaults", "_id": "4"}}' 41 | - '{ "content": "Кроссовки мужские Nike Runner 2 Mid Prem." }' 42 | 43 | - do: 44 | indices.refresh: 45 | index: "_all" 46 | 47 | - do: 48 | search: 49 | size: 0 50 | index: suggester_defaults 51 | body: 52 | suggest: 53 | text: 'Nike Кроссовки Runner 2' 54 | keyboard_layout_default: 55 | keyboard_layout: 56 | field: content 57 | language: russian 58 | 59 | - length: { suggest.keyboard_layout_default: 4 } 60 | - match: { suggest.keyboard_layout_default.0.text: 'Nike' } 61 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 62 | - match: { suggest.keyboard_layout_default.0.length: 4 } 63 | - length: { suggest.keyboard_layout_default.0.options: 0 } 64 | - match: { suggest.keyboard_layout_default.1.text: 'Кроссовки' } 65 | - match: { suggest.keyboard_layout_default.1.offset: 5 } 66 | - match: { suggest.keyboard_layout_default.1.length: 9 } 67 | - length: { suggest.keyboard_layout_default.1.options: 0 } 68 | - match: { suggest.keyboard_layout_default.2.text: 'Runner' } 69 | - match: { suggest.keyboard_layout_default.2.offset: 15 } 70 | - match: { suggest.keyboard_layout_default.2.length: 6 } 71 | - length: { suggest.keyboard_layout_default.2.options: 0 } 72 | - match: { suggest.keyboard_layout_default.3.text: '2' } 73 | - match: { suggest.keyboard_layout_default.3.offset: 22 } 74 | - match: { suggest.keyboard_layout_default.3.length: 1 } 75 | - length: { suggest.keyboard_layout_default.3.options: 0 } 76 | 77 | -------------------------------------------------------------------------------- /src/test/java/com/github/papahigh/keyboardswitcher/KeyboardSwitcherProviderTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import org.apache.lucene.util.LuceneTestCase; 19 | 20 | import static org.hamcrest.CoreMatchers.containsString; 21 | import static org.hamcrest.CoreMatchers.instanceOf; 22 | 23 | public class KeyboardSwitcherProviderTests extends LuceneTestCase { 24 | 25 | public void testCaseInsensitivity() { 26 | KeyboardSwitcher switcher1 = KeyboardSwitcherProvider.provide("RUSSIAN"); 27 | KeyboardSwitcher switcher2 = KeyboardSwitcherProvider.provide("russian"); 28 | KeyboardSwitcher switcher3 = KeyboardSwitcherProvider.provide("Russian"); 29 | KeyboardSwitcher switcher4 = KeyboardSwitcherProvider.provide("RuSsIaN"); 30 | 31 | assertEquals(switcher1.getClass(), RussianKeyboardSwitcher.class); 32 | assertEquals(switcher2.getClass(), RussianKeyboardSwitcher.class); 33 | assertEquals(switcher3.getClass(), RussianKeyboardSwitcher.class); 34 | assertEquals(switcher4.getClass(), RussianKeyboardSwitcher.class); 35 | } 36 | 37 | public void testRussianSwitcher() { 38 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("russian"); 39 | assertEquals(switcher.getClass(), RussianKeyboardSwitcher.class); 40 | } 41 | 42 | public void testUkrainianSwitcher() { 43 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("ukrainian"); 44 | assertEquals(switcher.getClass(), UkrainianKeyboardSwitcher.class); 45 | } 46 | 47 | public void testBelarusianSwitcher() { 48 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("belarusian"); 49 | assertEquals(switcher.getClass(), BelarusianKeyboardSwitcher.class); 50 | } 51 | 52 | public void testNullLanguage() { 53 | Throwable e = expectThrows(Throwable.class, () -> KeyboardSwitcherProvider.provide((String) null)); 54 | assertThat(e, instanceOf(IllegalArgumentException.class)); 55 | assertThat(e.getMessage(), containsString("No language was provided")); 56 | } 57 | 58 | public void testUnknownLanguage() { 59 | Throwable e = expectThrows(Throwable.class, () -> KeyboardSwitcherProvider.provide("QwerTYUioIUYTdfgHJK")); 60 | assertThat(e, instanceOf(IllegalArgumentException.class)); 61 | assertThat(e.getMessage(), containsString("Unknown language: QwerTYUioIUYTdfgHJK")); 62 | } 63 | } -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/70_switch_suggester_option_min_freq.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [min freq]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_min_freq 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | filter: ["lowercase"] 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_min_freq 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_min_freq", "_id": "1"}}' 36 | - '{ "content": "Чемодан на колесах Wings Predator 518-3 Light Green 75 см" }' 37 | - '{"index": {"_index": "suggester_min_freq", "_id": "2"}}' 38 | - '{ "content": "Колесо для чемодана RONCATO" }' 39 | - '{"index": {"_index": "suggester_min_freq", "_id": "3"}}' 40 | - '{ "content": "Чемодан Xiaomi" }' 41 | - '{"index": {"_index": "suggester_min_freq", "_id": "4"}}' 42 | - '{ "content": "Сумка на колесах American Tourister 16G*013 Road Quest *21 Deep Water Blue" }' 43 | - '{"index": {"_index": "suggester_min_freq", "_id": "5"}}' 44 | - '{ "content": "Мороженое Baskin Robbins сливочное миндально-фисташковое 64 г" }' 45 | - '{"index": {"_index": "suggester_min_freq", "_id": "6"}}' 46 | - '{ "content": "Мороженое Baskin Robbins сливочное миндально-фисташковое с миндалем 60 г" }' 47 | - '{"index": {"_index": "suggester_min_freq", "_id": "7"}}' 48 | - '{ "content": "Мороженое Baskin Robbins сливочное ванильно-черничное с прослойкой черники 600 г" }' 49 | - '{"index": {"_index": "suggester_min_freq", "_id": "8"}}' 50 | - '{ "content": "Apple Mac Pro (MD878)" }' 51 | 52 | - do: 53 | indices.refresh: 54 | index: "_all" 55 | 56 | - do: 57 | search: 58 | size: 0 59 | index: suggester_min_freq 60 | body: 61 | suggest: 62 | text: 'Vjhj;tyjt' 63 | keyboard_layout_default: 64 | keyboard_layout: 65 | field: content 66 | language: russian 67 | lowercase_token: true 68 | preserve_case: true 69 | add_original: true 70 | min_freq: 0.33 71 | 72 | - length: { suggest.keyboard_layout_default: 1 } 73 | - length: { suggest.keyboard_layout_default.0.options: 2 } 74 | 75 | - do: 76 | search: 77 | size: 0 78 | index: suggester_min_freq 79 | body: 80 | suggest: 81 | text: 'Vjhj;tyjt' 82 | keyboard_layout_default: 83 | keyboard_layout: 84 | field: content 85 | language: russian 86 | lowercase_token: true 87 | preserve_case: true 88 | add_original: true 89 | min_freq: 0.55 90 | 91 | - length: { suggest.keyboard_layout_default: 1 } 92 | - length: { suggest.keyboard_layout_default.0.options: 0 } 93 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/80_switch_suggester_option_max_freq.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [max freq]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_max_freq 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | filter: ["lowercase"] 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_max_freq 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_max_freq", "_id": "1"}}' 36 | - '{ "content": "Чемодан на колесах Wings Predator 518-3 Light Green 75 см" }' 37 | - '{"index": {"_index": "suggester_max_freq", "_id": "2"}}' 38 | - '{ "content": "Колесо для чемодана RONCATO" }' 39 | - '{"index": {"_index": "suggester_max_freq", "_id": "3"}}' 40 | - '{ "content": "Чемодан Xiaomi" }' 41 | - '{"index": {"_index": "suggester_max_freq", "_id": "4"}}' 42 | - '{ "content": "Сумка на колесах American Tourister 16G*013 Road Quest *21 Deep Water Blue" }' 43 | - '{"index": {"_index": "suggester_max_freq", "_id": "5"}}' 44 | - '{ "content": "Мороженое Baskin Robbins сливочное миндально-фисташковое 64 г" }' 45 | - '{"index": {"_index": "suggester_max_freq", "_id": "6"}}' 46 | - '{ "content": "Мороженое Baskin Robbins сливочное миндально-фисташковое с миндалем 60 г" }' 47 | - '{"index": {"_index": "suggester_max_freq", "_id": "7"}}' 48 | - '{ "content": "Мороженое Baskin Robbins сливочное ванильно-черничное с прослойкой черники 600 г" }' 49 | - '{"index": {"_index": "suggester_max_freq", "_id": "8"}}' 50 | - '{ "content": "Apple Mac Pro (MD878)" }' 51 | 52 | - do: 53 | indices.refresh: 54 | index: "_all" 55 | 56 | - do: 57 | search: 58 | size: 0 59 | index: suggester_max_freq 60 | body: 61 | suggest: 62 | text: 'Vjhj;tyjt' 63 | keyboard_layout_default: 64 | keyboard_layout: 65 | field: content 66 | language: russian 67 | lowercase_token: true 68 | preserve_case: true 69 | add_original: true 70 | max_freq: 0.34 71 | 72 | - length: { suggest.keyboard_layout_default: 1 } 73 | - length: { suggest.keyboard_layout_default.0.options: 2 } 74 | 75 | 76 | - do: 77 | search: 78 | size: 0 79 | index: suggester_max_freq 80 | body: 81 | suggest: 82 | text: 'Vjhj;tyjt' 83 | keyboard_layout_default: 84 | keyboard_layout: 85 | field: content 86 | language: russian 87 | lowercase_token: true 88 | preserve_case: true 89 | add_original: true 90 | max_freq: 0.33 91 | 92 | - length: { suggest.keyboard_layout_default: 1 } 93 | - length: { suggest.keyboard_layout_default.0.options: 0 } 94 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto init 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto init 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :init 68 | @rem Get command-line arguments, handling Windows variants 69 | 70 | if not "%OS%" == "Windows_NT" goto win9xME_args 71 | 72 | :win9xME_args 73 | @rem Slurp the command line arguments. 74 | set CMD_LINE_ARGS= 75 | set _SKIP=2 76 | 77 | :win9xME_args_slurp 78 | if "x%~1" == "x" goto execute 79 | 80 | set CMD_LINE_ARGS=%* 81 | 82 | :execute 83 | @rem Setup the command line 84 | 85 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 86 | 87 | @rem Execute Gradle 88 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 89 | 90 | :end 91 | @rem End local scope for the variables with windows NT shell 92 | if "%ERRORLEVEL%"=="0" goto mainEnd 93 | 94 | :fail 95 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 96 | rem the _cmd.exe /c_ return code! 97 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 98 | exit /b 1 99 | 100 | :mainEnd 101 | if "%OS%"=="Windows_NT" endlocal 102 | 103 | :omega 104 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/20_switch_suggester_defaults.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [default options]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_defaults 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | mappings: 24 | properties: 25 | content: 26 | type: text 27 | analyzer: "my_analyzer" 28 | 29 | - do: 30 | bulk: 31 | index: suggester_defaults 32 | refresh: true 33 | body: 34 | - '{"index": {"_index": "suggester_defaults", "_id": "1"}}' 35 | - '{ "content": "Кроссовки женские Nike MD Runner 2" }' 36 | - '{"index": {"_index": "suggester_defaults", "_id": "2"}}' 37 | - '{ "content": "Валенки мужские Nike MD Runner 2" }' 38 | - '{"index": {"_index": "suggester_defaults", "_id": "3"}}' 39 | - '{ "content": "Кроссовки для мальчиков Nike MD Runner 2." }' 40 | - '{"index": {"_index": "suggester_defaults", "_id": "4"}}' 41 | - '{ "content": "Кроссовки мужские Nike Runner 2 Mid Prem." }' 42 | 43 | - do: 44 | indices.refresh: 45 | index: "_all" 46 | 47 | - do: 48 | search: 49 | size: 0 50 | index: suggester_defaults 51 | body: 52 | suggest: 53 | text: 'Тшлу Rhjccjdrb Runner 2' 54 | keyboard_layout_default: 55 | keyboard_layout: 56 | field: content 57 | language: russian 58 | 59 | - length: { suggest.keyboard_layout_default: 4 } 60 | # suggest [ Тшлу ] -> [ Nike ] 61 | - match: { suggest.keyboard_layout_default.0.text: 'Тшлу' } 62 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 63 | - match: { suggest.keyboard_layout_default.0.length: 4 } 64 | - length: { suggest.keyboard_layout_default.0.options: 1 } 65 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'Nike' } 66 | - gte: { suggest.keyboard_layout_default.0.options.0.freq: 1 } 67 | - lte: { suggest.keyboard_layout_default.0.options.0.freq: 4 } 68 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 69 | # suggest [ Rhjccjdrb ] -> [ Кроссовки ] 70 | - match: { suggest.keyboard_layout_default.1.text: 'Rhjccjdrb' } 71 | - match: { suggest.keyboard_layout_default.1.offset: 5 } 72 | - match: { suggest.keyboard_layout_default.1.length: 9 } 73 | - length: { suggest.keyboard_layout_default.1.options: 1 } 74 | - match: { suggest.keyboard_layout_default.1.options.0.text: 'Кроссовки' } 75 | - gte: { suggest.keyboard_layout_default.1.options.0.freq: 1 } 76 | - lte: { suggest.keyboard_layout_default.1.options.0.freq: 4 } 77 | - match: { suggest.keyboard_layout_default.1.options.0.switch: true } 78 | # suggest [ nothing ] 79 | - match: { suggest.keyboard_layout_default.2.text: 'Runner' } 80 | - match: { suggest.keyboard_layout_default.2.offset: 15 } 81 | - match: { suggest.keyboard_layout_default.2.length: 6 } 82 | - length: { suggest.keyboard_layout_default.2.options: 0 } 83 | # suggest [ nothing ] 84 | - match: { suggest.keyboard_layout_default.3.text: '2' } 85 | - match: { suggest.keyboard_layout_default.3.offset: 22 } 86 | - match: { suggest.keyboard_layout_default.3.length: 1 } 87 | - length: { suggest.keyboard_layout_default.3.options: 0 } 88 | 89 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/40_switch_suggester_option_lowercase_token.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [lowercase_token]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_lowercase_token 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_lowercase_token 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_lowercase_token", "_id": "1"}}' 36 | - '{ "content": "To be or not to be" }' 37 | - '{"index": {"_index": "suggester_lowercase_token", "_id": "2"}}' 38 | - '{ "content": "мороз и солнце и еще кое-что" }' 39 | - '{"index": {"_index": "suggester_lowercase_token", "_id": "3"}}' 40 | - '{ "content": "Кроссовки для мальчиков и девочек модель B/B-b-b-b" }' 41 | - '{"index": {"_index": "suggester_lowercase_token", "_id": "4"}}' 42 | - '{ "content": "и куда же без B-52" }' 43 | 44 | - do: 45 | indices.refresh: 46 | index: "_all" 47 | 48 | - do: 49 | search: 50 | size: 0 51 | index: suggester_lowercase_token 52 | body: 53 | suggest: 54 | text: 'B Relf ;t ,tp' 55 | keyboard_layout_default: 56 | keyboard_layout: 57 | field: content 58 | language: russian 59 | lowercase_token: true 60 | 61 | - length: { suggest.keyboard_layout_default: 4 } 62 | # suggest [ b ] -> [ и, b ] 63 | - match: { suggest.keyboard_layout_default.0.text: 'B' } 64 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 65 | - match: { suggest.keyboard_layout_default.0.length: 1 } 66 | - length: { suggest.keyboard_layout_default.0.options: 1 } 67 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'и' } 68 | - gte: { suggest.keyboard_layout_default.0.options.0.freq: 1 } 69 | - lte: { suggest.keyboard_layout_default.0.options.0.freq: 4 } 70 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 71 | # suggest [ relf ] -> [ куда ] 72 | - match: { suggest.keyboard_layout_default.1.text: 'Relf' } 73 | - match: { suggest.keyboard_layout_default.1.offset: 2 } 74 | - match: { suggest.keyboard_layout_default.1.length: 4 } 75 | - length: { suggest.keyboard_layout_default.1.options: 1 } 76 | - match: { suggest.keyboard_layout_default.1.options.0.text: 'куда' } 77 | - gte: { suggest.keyboard_layout_default.1.options.0.freq: 1 } 78 | - lte: { suggest.keyboard_layout_default.1.options.0.freq: 4 } 79 | - match: { suggest.keyboard_layout_default.1.options.0.switch: true } 80 | # suggest [ ;t ] -> [ же ] 81 | - match: { suggest.keyboard_layout_default.2.text: ';t' } 82 | - match: { suggest.keyboard_layout_default.2.offset: 7 } 83 | - match: { suggest.keyboard_layout_default.2.length: 2 } 84 | - length: { suggest.keyboard_layout_default.2.options: 1 } 85 | - match: { suggest.keyboard_layout_default.2.options.0.text: 'же' } 86 | - gte: { suggest.keyboard_layout_default.2.options.0.freq: 1 } 87 | - lt: { suggest.keyboard_layout_default.2.options.0.freq: 2 } 88 | - match: { suggest.keyboard_layout_default.2.options.0.switch: true } 89 | # suggest [ ,tp ] -> [ без ] 90 | - match: { suggest.keyboard_layout_default.3.text: ',tp' } 91 | - match: { suggest.keyboard_layout_default.3.offset: 10 } 92 | - match: { suggest.keyboard_layout_default.3.length: 3 } 93 | - length: { suggest.keyboard_layout_default.3.options: 1 } 94 | - match: { suggest.keyboard_layout_default.3.options.0.text: 'без' } 95 | - gte: { suggest.keyboard_layout_default.3.options.0.freq: 1 } 96 | - lt: { suggest.keyboard_layout_default.3.options.0.freq: 2 } 97 | - match: { suggest.keyboard_layout_default.3.options.0.switch: true } 98 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/50_switch_suggester_option_preserve_case.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [preserve_case]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_preserve_case 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_preserve_case 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_preserve_case", "_id": "1"}}' 36 | - '{ "content": "To be or not to be" }' 37 | - '{"index": {"_index": "suggester_preserve_case", "_id": "2"}}' 38 | - '{ "content": "мороз и солнце и еще кое-что" }' 39 | - '{"index": {"_index": "suggester_preserve_case", "_id": "3"}}' 40 | - '{ "content": "Кроссовки для мальчиков и девочек модель B/B-b-b-b" }' 41 | - '{"index": {"_index": "suggester_preserve_case", "_id": "4"}}' 42 | - '{ "content": "и куда же без B-52" }' 43 | 44 | - do: 45 | indices.refresh: 46 | index: "_all" 47 | 48 | - do: 49 | search: 50 | size: 0 51 | index: suggester_preserve_case 52 | body: 53 | suggest: 54 | text: 'B Relf ;t ,tp' 55 | keyboard_layout_default: 56 | keyboard_layout: 57 | field: content 58 | language: russian 59 | lowercase_token: true 60 | preserve_case: true 61 | 62 | - length: { suggest.keyboard_layout_default: 4 } 63 | # suggest [ b ] -> [ и, b ] 64 | - match: { suggest.keyboard_layout_default.0.text: 'B' } 65 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 66 | - match: { suggest.keyboard_layout_default.0.length: 1 } 67 | - length: { suggest.keyboard_layout_default.0.options: 1 } 68 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'И' } 69 | - gte: { suggest.keyboard_layout_default.0.options.0.freq: 1 } 70 | - lte: { suggest.keyboard_layout_default.0.options.0.freq: 4 } 71 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 72 | # suggest [ relf ] -> [ куда ] 73 | - match: { suggest.keyboard_layout_default.1.text: 'Relf' } 74 | - match: { suggest.keyboard_layout_default.1.offset: 2 } 75 | - match: { suggest.keyboard_layout_default.1.length: 4 } 76 | - length: { suggest.keyboard_layout_default.1.options: 1 } 77 | - match: { suggest.keyboard_layout_default.1.options.0.text: 'Куда' } 78 | - gte: { suggest.keyboard_layout_default.1.options.0.freq: 1 } 79 | - lte: { suggest.keyboard_layout_default.1.options.0.freq: 4 } 80 | - match: { suggest.keyboard_layout_default.1.options.0.switch: true } 81 | # suggest [ ;t ] -> [ же ] 82 | - match: { suggest.keyboard_layout_default.2.text: ';t' } 83 | - match: { suggest.keyboard_layout_default.2.offset: 7 } 84 | - match: { suggest.keyboard_layout_default.2.length: 2 } 85 | - length: { suggest.keyboard_layout_default.2.options: 1 } 86 | - match: { suggest.keyboard_layout_default.2.options.0.text: 'же' } 87 | - gte: { suggest.keyboard_layout_default.2.options.0.freq: 1 } 88 | - lt: { suggest.keyboard_layout_default.2.options.0.freq: 2 } 89 | - match: { suggest.keyboard_layout_default.2.options.0.switch: true } 90 | # suggest [ ,tp ] -> [ без ] 91 | - match: { suggest.keyboard_layout_default.3.text: ',tp' } 92 | - match: { suggest.keyboard_layout_default.3.offset: 10 } 93 | - match: { suggest.keyboard_layout_default.3.length: 3 } 94 | - length: { suggest.keyboard_layout_default.3.options: 1 } 95 | - match: { suggest.keyboard_layout_default.3.options.0.text: 'без' } 96 | - gte: { suggest.keyboard_layout_default.3.options.0.freq: 1 } 97 | - lt: { suggest.keyboard_layout_default.3.options.0.freq: 2 } 98 | - match: { suggest.keyboard_layout_default.3.options.0.switch: true } 99 | -------------------------------------------------------------------------------- /src/test/java/com/github/papahigh/keyboardswitcher/RussianKeyboardSwitcherTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import org.apache.commons.codec.EncoderException; 19 | import org.apache.lucene.util.LuceneTestCase; 20 | 21 | import java.util.Arrays; 22 | 23 | import static org.hamcrest.CoreMatchers.containsString; 24 | import static org.hamcrest.CoreMatchers.instanceOf; 25 | 26 | public class RussianKeyboardSwitcherTests extends LuceneTestCase { 27 | 28 | public void testEncodeObject() throws EncoderException { 29 | 30 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("russian"); 31 | 32 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode(null)); 33 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((String) null)); 34 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Integer) null)); 35 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Object) null)); 36 | 37 | Throwable e = expectThrows(Throwable.class, () -> switcher.encode(123)); 38 | assertThat(e, instanceOf(EncoderException.class)); 39 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 40 | 41 | e = expectThrows(Throwable.class, () -> switcher.encode(switcher)); 42 | assertThat(e, instanceOf(EncoderException.class)); 43 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 44 | 45 | } 46 | 47 | public void testSwitchLayout() { 48 | 49 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("russian"); 50 | 51 | char[] firstRowEnglish = "qwertyuiop[]QWERTYUIOP{}".toCharArray(); 52 | char[] firstRowRussian = "йцукенгшщзхъЙЦУКЕНГШЩЗХЪ".toCharArray(); 53 | 54 | assertTrue(Arrays.equals(firstRowEnglish, switcher.switchLayout(firstRowRussian, 0, firstRowRussian.length, false))); 55 | assertTrue(Arrays.equals(firstRowRussian, switcher.switchLayout(firstRowEnglish, 0, firstRowEnglish.length, false))); 56 | 57 | char[] secondRowEnglish = "asdfghjkl;'\\ASDFGHJKL:\"|".toCharArray(); 58 | char[] secondRowRussian = "фывапролджэёФЫВАПРОЛДЖЭЁ".toCharArray(); 59 | 60 | assertTrue(Arrays.equals(secondRowEnglish, switcher.switchLayout(secondRowRussian, 0, secondRowRussian.length, false))); 61 | assertTrue(Arrays.equals(secondRowRussian, switcher.switchLayout(secondRowEnglish, 0, secondRowEnglish.length, false))); 62 | 63 | char[] thirdRowEnglish = "zxcvbnm,.ZXCVBNM<>".toCharArray(); 64 | char[] thirdRowRussian = "ячсмитьбюЯЧСМИТЬБЮ".toCharArray(); 65 | 66 | assertTrue(Arrays.equals(thirdRowEnglish, switcher.switchLayout(thirdRowRussian, 0, thirdRowRussian.length, false))); 67 | assertTrue(Arrays.equals(thirdRowRussian, switcher.switchLayout(thirdRowEnglish, 0, thirdRowEnglish.length, false))); 68 | 69 | } 70 | 71 | public void testEncodeString() { 72 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("russian"); 73 | 74 | String firstRowEnglish = "qwertyuiop[]QWERTYUIOP{}"; 75 | String firstRowRussian = "йцукенгшщзхъЙЦУКЕНГШЩЗХЪ"; 76 | 77 | assertEquals(firstRowEnglish, switcher.encode(firstRowRussian)); 78 | assertEquals(firstRowRussian, switcher.encode(firstRowEnglish)); 79 | 80 | String secondRowEnglish = "asdfghjkl;'\\ASDFGHJKL:\"|"; 81 | String secondRowRussian = "фывапролджэёФЫВАПРОЛДЖЭЁ"; 82 | 83 | assertEquals(secondRowEnglish, switcher.encode(secondRowRussian)); 84 | assertEquals(secondRowRussian, switcher.encode(secondRowEnglish)); 85 | 86 | String thirdRowEnglish = "zxcvbnm,.ZXCVBNM<>"; 87 | String thirdRowRussian = "ячсмитьбюЯЧСМИТЬБЮ"; 88 | 89 | assertEquals(thirdRowEnglish, switcher.encode(thirdRowRussian)); 90 | assertEquals(thirdRowRussian, switcher.encode(thirdRowEnglish)); 91 | 92 | } 93 | 94 | } -------------------------------------------------------------------------------- /src/test/java/com/github/papahigh/keyboardswitcher/UkrainianKeyboardSwitcherTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import org.apache.commons.codec.EncoderException; 19 | import org.apache.lucene.util.LuceneTestCase; 20 | 21 | import java.util.Arrays; 22 | 23 | import static org.hamcrest.CoreMatchers.containsString; 24 | import static org.hamcrest.CoreMatchers.instanceOf; 25 | 26 | public class UkrainianKeyboardSwitcherTests extends LuceneTestCase { 27 | 28 | 29 | public void testEncodeObject() throws EncoderException { 30 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("ukrainian"); 31 | 32 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode(null)); 33 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((String) null)); 34 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Integer) null)); 35 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Object) null)); 36 | 37 | Throwable e = expectThrows(Throwable.class, () -> switcher.encode(123)); 38 | assertThat(e, instanceOf(EncoderException.class)); 39 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 40 | 41 | e = expectThrows(Throwable.class, () -> switcher.encode(switcher)); 42 | assertThat(e, instanceOf(EncoderException.class)); 43 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 44 | 45 | } 46 | 47 | 48 | 49 | public void testSwitchLayout() { 50 | 51 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("ukrainian"); 52 | 53 | char[] firstRowEnglish = "qwertyuiop[]QWERTYUIOP{}".toCharArray(); 54 | char[] firstRowRussian = "йцукенгшщзхїЙЦУКЕНГШЩЗХЇ".toCharArray(); 55 | 56 | assertTrue(Arrays.equals(firstRowEnglish, switcher.switchLayout(firstRowRussian, 0, firstRowRussian.length, false))); 57 | assertTrue(Arrays.equals(firstRowRussian, switcher.switchLayout(firstRowEnglish, 0, firstRowEnglish.length, false))); 58 | 59 | char[] secondRowEnglish = "asdfghjkl;'ASDFGHJKL:\"".toCharArray(); 60 | char[] secondRowRussian = "фівапролджєФІВАПРОЛДЖЄ".toCharArray(); 61 | 62 | assertTrue(Arrays.equals(secondRowEnglish, switcher.switchLayout(secondRowRussian, 0, secondRowRussian.length, false))); 63 | assertTrue(Arrays.equals(secondRowRussian, switcher.switchLayout(secondRowEnglish, 0, secondRowEnglish.length, false))); 64 | 65 | char[] thirdRowEnglish = "\\zxcvbnm,.|ZXCVBNM<>".toCharArray(); 66 | char[] thirdRowRussian = "ґячсмитьбюҐЯЧСМИТЬБЮ".toCharArray(); 67 | 68 | assertTrue(Arrays.equals(thirdRowEnglish, switcher.switchLayout(thirdRowRussian, 0, thirdRowRussian.length, false))); 69 | assertTrue(Arrays.equals(thirdRowRussian, switcher.switchLayout(thirdRowEnglish, 0, thirdRowEnglish.length, false))); 70 | 71 | } 72 | 73 | public void testEncodeString() { 74 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("ukrainian"); 75 | 76 | String firstRowEnglish = "qwertyuiop[]QWERTYUIOP{}"; 77 | String firstRowRussian = "йцукенгшщзхїЙЦУКЕНГШЩЗХЇ"; 78 | 79 | assertEquals(firstRowEnglish, switcher.encode(firstRowRussian)); 80 | assertEquals(firstRowRussian, switcher.encode(firstRowEnglish)); 81 | 82 | String secondRowEnglish = "asdfghjkl;'ASDFGHJKL:\""; 83 | String secondRowRussian = "фівапролджєФІВАПРОЛДЖЄ"; 84 | 85 | assertEquals(secondRowEnglish, switcher.encode(secondRowRussian)); 86 | assertEquals(secondRowRussian, switcher.encode(secondRowEnglish)); 87 | 88 | String thirdRowEnglish = "\\zxcvbnm,.|ZXCVBNM<>"; 89 | String thirdRowRussian = "ґячсмитьбюҐЯЧСМИТЬБЮ"; 90 | 91 | assertEquals(thirdRowEnglish, switcher.encode(thirdRowRussian)); 92 | assertEquals(thirdRowRussian, switcher.encode(thirdRowEnglish)); 93 | 94 | } 95 | } -------------------------------------------------------------------------------- /src/test/java/com/github/papahigh/keyboardswitcher/BelarusianKeyboardSwitcherTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.papahigh.keyboardswitcher; 17 | 18 | import org.apache.commons.codec.EncoderException; 19 | import org.apache.lucene.util.LuceneTestCase; 20 | 21 | import java.util.Arrays; 22 | 23 | import static org.hamcrest.CoreMatchers.containsString; 24 | import static org.hamcrest.CoreMatchers.instanceOf; 25 | 26 | public class BelarusianKeyboardSwitcherTests extends LuceneTestCase { 27 | 28 | 29 | public void testEncodeObject() throws EncoderException { 30 | 31 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("belarusian"); 32 | 33 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode(null)); 34 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((String) null)); 35 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Integer) null)); 36 | assertEquals(KeyboardSwitcher.EMPTY_STRING, switcher.encode((Object) null)); 37 | 38 | Throwable e = expectThrows(Throwable.class, () -> switcher.encode(123)); 39 | assertThat(e, instanceOf(EncoderException.class)); 40 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 41 | 42 | e = expectThrows(Throwable.class, () -> switcher.encode(switcher)); 43 | assertThat(e, instanceOf(EncoderException.class)); 44 | assertThat(e.getMessage(), containsString("Unsupported parameter type supplied to [KeyboardSwitcher]")); 45 | 46 | } 47 | 48 | 49 | 50 | public void testSwitchLayout() { 51 | 52 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("belarusian"); 53 | 54 | char[] firstRowEnglish = "qwertyuiop[QWERTYUIOP{".toCharArray(); 55 | char[] firstRowRussian = "йцукенгшўзхЙЦУКЕНГШЎЗХ".toCharArray(); 56 | 57 | // А Б В Г Д Е Ё Ж З І Й К Л М Н О П Р С Т У Ў Ф Х Ц Ч Ш Ы Ь Э Ю Я 58 | // а б в г д е ё ж з і й к л м н о п р с т у ў ф х ц ч ш ы ь э ю я 59 | 60 | assertTrue(Arrays.equals(firstRowEnglish, switcher.switchLayout(firstRowRussian, 0, firstRowRussian.length, false))); 61 | assertTrue(Arrays.equals(firstRowRussian, switcher.switchLayout(firstRowEnglish, 0, firstRowEnglish.length, false))); 62 | 63 | char[] secondRowEnglish = "asdfghjkl;'ASDFGHJKL:\"".toCharArray(); 64 | char[] secondRowRussian = "фывапролджэФЫВАПРОЛДЖЭ".toCharArray(); 65 | 66 | assertTrue(Arrays.equals(secondRowEnglish, switcher.switchLayout(secondRowRussian, 0, secondRowRussian.length, false))); 67 | assertTrue(Arrays.equals(secondRowRussian, switcher.switchLayout(secondRowEnglish, 0, secondRowEnglish.length, false))); 68 | 69 | char[] thirdRowEnglish = "zxcvbnm,.ZXCVBNM<>".toCharArray(); 70 | char[] thirdRowRussian = "ячсмітьбюЯЧСМІТЬБЮ".toCharArray(); 71 | 72 | assertTrue(Arrays.equals(thirdRowEnglish, switcher.switchLayout(thirdRowRussian, 0, thirdRowRussian.length, false))); 73 | assertTrue(Arrays.equals(thirdRowRussian, switcher.switchLayout(thirdRowEnglish, 0, thirdRowEnglish.length, false))); 74 | 75 | } 76 | 77 | public void testEncodeString() { 78 | KeyboardSwitcher switcher = KeyboardSwitcherProvider.provide("belarusian"); 79 | 80 | String firstRowEnglish = "qwertyuiop[QWERTYUIOP{"; 81 | String firstRowRussian = "йцукенгшўзхЙЦУКЕНГШЎЗХ"; 82 | 83 | assertEquals(firstRowEnglish, switcher.encode(firstRowRussian)); 84 | assertEquals(firstRowRussian, switcher.encode(firstRowEnglish)); 85 | 86 | String secondRowEnglish = "asdfghjkl;'ASDFGHJKL:\""; 87 | String secondRowRussian = "фывапролджэФЫВАПРОЛДЖЭ"; 88 | 89 | assertEquals(secondRowEnglish, switcher.encode(secondRowRussian)); 90 | assertEquals(secondRowRussian, switcher.encode(secondRowEnglish)); 91 | 92 | String thirdRowEnglish = "zxcvbnm,.ZXCVBNM<>"; 93 | String thirdRowRussian = "ячсмітьбюЯЧСМІТЬБЮ"; 94 | 95 | assertEquals(thirdRowEnglish, switcher.encode(thirdRowRussian)); 96 | assertEquals(thirdRowRussian, switcher.encode(thirdRowEnglish)); 97 | 98 | } 99 | } -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/30_switch_suggester_option_analyzer.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [custom suggest analyzer]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_custom_analyzer 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | filter: ["lowercase"] 24 | my_suggest_analyzer: 25 | tokenizer: keyboard_tokenizer 26 | filter: ["lowercase"] 27 | mappings: 28 | properties: 29 | content: 30 | type: text 31 | analyzer: "my_analyzer" 32 | 33 | - do: 34 | bulk: 35 | index: suggester_custom_analyzer 36 | refresh: true 37 | body: 38 | - '{"index": {"_index": "suggester_custom_analyzer", "_id": "1"}}' 39 | - '{ "content": "The quick brown fox jumps over the lazy dog." }' 40 | - '{"index": {"_index": "suggester_custom_analyzer", "_id": "2"}}' 41 | - '{ "content": "Съешь же еще этих мягких французских булок, да выпей чаю." }' 42 | - '{"index": {"_index": "suggester_custom_analyzer", "_id": "3"}}' 43 | - '{ "content": "Реклама транслирует потребительский креатив этих мягких французских булок." }' 44 | - '{"index": {"_index": "suggester_custom_analyzer", "_id": "4"}}' 45 | - '{ "content": "Нишевый проект, следовательно, раскручивает социометрический BTL, полагаясь на инсайдерскую информацию." }' 46 | 47 | - do: 48 | indices.refresh: 49 | index: "_all" 50 | 51 | - do: 52 | search: 53 | size: 0 54 | index: suggester_custom_analyzer 55 | body: 56 | suggest: 57 | text: 'ИЕД ,EKjr AhfywepcRB[ Brown fox Реклама мягких' 58 | keyboard_layout_default: 59 | keyboard_layout: 60 | field: content 61 | language: russian 62 | analyzer: my_suggest_analyzer 63 | 64 | - length: { suggest.keyboard_layout_default: 7 } 65 | # suggest [ иед ] -> [ btl ] 66 | - match: { suggest.keyboard_layout_default.0.text: 'иед' } 67 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 68 | - match: { suggest.keyboard_layout_default.0.length: 3 } 69 | - length: { suggest.keyboard_layout_default.0.options: 1 } 70 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'btl' } 71 | - match: { suggest.keyboard_layout_default.0.options.0.freq: 1 } 72 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 73 | # suggest [ ,ekjr ] -> [ булок ] 74 | - match: { suggest.keyboard_layout_default.1.text: ',ekjr' } 75 | - match: { suggest.keyboard_layout_default.1.offset: 4 } 76 | - match: { suggest.keyboard_layout_default.1.length: 5 } 77 | - length: { suggest.keyboard_layout_default.1.options: 1 } 78 | - match: { suggest.keyboard_layout_default.1.options.0.text: 'булок' } 79 | - gte: { suggest.keyboard_layout_default.1.options.0.freq: 1 } 80 | - lte: { suggest.keyboard_layout_default.1.options.0.freq: 2 } 81 | - match: { suggest.keyboard_layout_default.1.options.0.switch: true } 82 | # suggest [ ahfywepcrb[ ] -> [ французских ] 83 | - match: { suggest.keyboard_layout_default.2.text: 'ahfywepcrb[' } 84 | - match: { suggest.keyboard_layout_default.2.offset: 10 } 85 | - match: { suggest.keyboard_layout_default.2.length: 11 } 86 | - length: { suggest.keyboard_layout_default.2.options: 1 } 87 | - match: { suggest.keyboard_layout_default.2.options.0.text: 'французских' } 88 | - gte: { suggest.keyboard_layout_default.2.options.0.freq: 1 } 89 | - lte: { suggest.keyboard_layout_default.2.options.0.freq: 2 } 90 | - match: { suggest.keyboard_layout_default.2.options.0.switch: true } 91 | # suggest [ nothing ] 92 | - match: { suggest.keyboard_layout_default.3.text: 'brown' } 93 | - match: { suggest.keyboard_layout_default.3.offset: 22 } 94 | - match: { suggest.keyboard_layout_default.3.length: 5 } 95 | - length: { suggest.keyboard_layout_default.3.options: 0 } 96 | # suggest [ nothing ] 97 | - match: { suggest.keyboard_layout_default.4.text: 'fox' } 98 | - match: { suggest.keyboard_layout_default.4.offset: 28 } 99 | - match: { suggest.keyboard_layout_default.4.length: 3 } 100 | - length: { suggest.keyboard_layout_default.4.options: 0 } 101 | # suggest [ nothing ] 102 | - match: { suggest.keyboard_layout_default.5.text: 'реклама' } 103 | - match: { suggest.keyboard_layout_default.5.offset: 32 } 104 | - match: { suggest.keyboard_layout_default.5.length: 7 } 105 | - length: { suggest.keyboard_layout_default.5.options: 0 } 106 | # suggest [ nothing ] 107 | - match: { suggest.keyboard_layout_default.6.text: 'мягких' } 108 | - match: { suggest.keyboard_layout_default.6.offset: 40 } 109 | - match: { suggest.keyboard_layout_default.6.length: 6 } 110 | - length: { suggest.keyboard_layout_default.6.options: 0 } 111 | -------------------------------------------------------------------------------- /src/test/resources/rest-api-spec/test/keyboard_layout/60_switch_suggester_option_add_original.yml: -------------------------------------------------------------------------------- 1 | "Keyboard Layout Suggester [add_original]": 2 | - do: 3 | cluster.health: 4 | wait_for_nodes: 2 5 | 6 | - is_true: cluster_name 7 | - is_false: timed_out 8 | - gte: { number_of_nodes: 2 } 9 | - gte: { number_of_data_nodes: 2 } 10 | 11 | - do: 12 | indices.create: 13 | index: suggester_add_original 14 | body: 15 | settings: 16 | number_of_shards: 2 17 | number_of_replicas: 0 18 | index: 19 | analysis: 20 | analyzer: 21 | my_analyzer: 22 | tokenizer: standard 23 | 24 | mappings: 25 | properties: 26 | content: 27 | type: text 28 | analyzer: "my_analyzer" 29 | 30 | - do: 31 | bulk: 32 | index: suggester_add_original 33 | refresh: true 34 | body: 35 | - '{"index": {"_index": "suggester_add_original", "_id": "1"}}' 36 | - '{ "content": "To be or not to be" }' 37 | - '{"index": {"_index": "suggester_add_original", "_id": "2"}}' 38 | - '{ "content": "мороз и солнце и еще кое-что" }' 39 | - '{"index": {"_index": "suggester_add_original", "_id": "3"}}' 40 | - '{ "content": "Кроссовки для мальчиков и девочек модель B/B-b-b-b" }' 41 | - '{"index": {"_index": "suggester_add_original", "_id": "4"}}' 42 | - '{ "content": "и куда же без B-52" }' 43 | 44 | - do: 45 | indices.refresh: 46 | index: "_all" 47 | 48 | - do: 49 | search: 50 | size: 0 51 | index: suggester_add_original 52 | body: 53 | suggest: 54 | text: 'B Relf ;t ,tp' 55 | keyboard_layout_default: 56 | keyboard_layout: 57 | field: content 58 | language: russian 59 | lowercase_token: true 60 | preserve_case: true 61 | add_original: true 62 | 63 | - length: { suggest.keyboard_layout_default: 4 } 64 | # suggest [ B ] -> [ И, B ] 65 | - match: { suggest.keyboard_layout_default.0.text: 'B' } 66 | - match: { suggest.keyboard_layout_default.0.offset: 0 } 67 | - match: { suggest.keyboard_layout_default.0.length: 1 } 68 | - length: { suggest.keyboard_layout_default.0.options: 2 } 69 | - match: { suggest.keyboard_layout_default.0.options.0.text: 'И' } 70 | - gte: { suggest.keyboard_layout_default.0.options.0.freq: 1 } 71 | - lte: { suggest.keyboard_layout_default.0.options.0.freq: 4 } 72 | - match: { suggest.keyboard_layout_default.0.options.0.switch: true } 73 | - match: { suggest.keyboard_layout_default.0.options.1.text: 'B' } 74 | - gte: { suggest.keyboard_layout_default.0.options.1.freq: 1 } 75 | - lte: { suggest.keyboard_layout_default.0.options.1.freq: 4 } 76 | - match: { suggest.keyboard_layout_default.0.options.1.switch: false } 77 | 78 | # suggest [ Relf ] -> [ Куда, Relf ] 79 | - match: { suggest.keyboard_layout_default.1.text: 'Relf' } 80 | - match: { suggest.keyboard_layout_default.1.offset: 2 } 81 | - match: { suggest.keyboard_layout_default.1.length: 4 } 82 | - length: { suggest.keyboard_layout_default.1.options: 2 } 83 | - match: { suggest.keyboard_layout_default.1.options.0.text: 'Куда' } 84 | - gte: { suggest.keyboard_layout_default.1.options.0.freq: 1 } 85 | - lte: { suggest.keyboard_layout_default.1.options.0.freq: 4 } 86 | - match: { suggest.keyboard_layout_default.1.options.0.switch: true } 87 | - match: { suggest.keyboard_layout_default.1.options.1.text: 'Relf' } 88 | - match: { suggest.keyboard_layout_default.1.options.1.freq: 0 } 89 | - match: { suggest.keyboard_layout_default.1.options.1.switch: false } 90 | 91 | # suggest [ ;t ] -> [ же, ;t ] 92 | - match: { suggest.keyboard_layout_default.2.text: ';t' } 93 | - match: { suggest.keyboard_layout_default.2.offset: 7 } 94 | - match: { suggest.keyboard_layout_default.2.length: 2 } 95 | - length: { suggest.keyboard_layout_default.2.options: 2 } 96 | - match: { suggest.keyboard_layout_default.2.options.0.text: 'же' } 97 | - gte: { suggest.keyboard_layout_default.2.options.0.freq: 1 } 98 | - lt: { suggest.keyboard_layout_default.2.options.0.freq: 2 } 99 | - match: { suggest.keyboard_layout_default.2.options.0.switch: true } 100 | - match: { suggest.keyboard_layout_default.2.options.1.text: ';t' } 101 | - match: { suggest.keyboard_layout_default.2.options.1.freq: 0 } 102 | - match: { suggest.keyboard_layout_default.2.options.1.switch: false } 103 | 104 | # suggest [ ,tp ] -> [ без, ,tp ] 105 | - match: { suggest.keyboard_layout_default.3.text: ',tp' } 106 | - match: { suggest.keyboard_layout_default.3.offset: 10 } 107 | - match: { suggest.keyboard_layout_default.3.length: 3 } 108 | - length: { suggest.keyboard_layout_default.3.options: 2 } 109 | - match: { suggest.keyboard_layout_default.3.options.0.text: 'без' } 110 | - gte: { suggest.keyboard_layout_default.3.options.0.freq: 1 } 111 | - lt: { suggest.keyboard_layout_default.3.options.0.freq: 2 } 112 | - match: { suggest.keyboard_layout_default.3.options.0.switch: true } 113 | - match: { suggest.keyboard_layout_default.3.options.1.text: ',tp' } 114 | - match: { suggest.keyboard_layout_default.3.options.1.freq: 0 } 115 | - match: { suggest.keyboard_layout_default.3.options.1.switch: false } 116 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=`expr $i + 1` 158 | done 159 | case $i in 160 | 0) set -- ;; 161 | 1) set -- "$args0" ;; 162 | 2) set -- "$args0" "$args1" ;; 163 | 3) set -- "$args0" "$args1" "$args2" ;; 164 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=`save "$@"` 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | exec "$JAVACMD" "$@" 184 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/search/suggest/keyboard/KeyboardLayoutSuggestion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.search.suggest.keyboard; 17 | 18 | import org.elasticsearch.common.ParseField; 19 | import org.elasticsearch.common.io.stream.StreamInput; 20 | import org.elasticsearch.common.io.stream.StreamOutput; 21 | import org.elasticsearch.common.text.Text; 22 | import org.elasticsearch.common.xcontent.ConstructingObjectParser; 23 | import org.elasticsearch.common.xcontent.ObjectParser; 24 | import org.elasticsearch.common.xcontent.XContentBuilder; 25 | import org.elasticsearch.common.xcontent.XContentParser; 26 | import org.elasticsearch.search.suggest.Suggest; 27 | 28 | import java.io.IOException; 29 | import java.util.Comparator; 30 | import java.util.Objects; 31 | 32 | import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; 33 | 34 | 35 | public final class KeyboardLayoutSuggestion extends Suggest.Suggestion { 36 | 37 | KeyboardLayoutSuggestion(String name, int size) { 38 | super(name, size); 39 | } 40 | 41 | public KeyboardLayoutSuggestion(StreamInput in) throws IOException { 42 | super(in); 43 | } 44 | 45 | @Override 46 | protected Comparator sortComparator() { 47 | return FREQUENCY; 48 | } 49 | 50 | @Override 51 | public String getWriteableName() { 52 | return KeyboardLayoutSuggestionBuilder.SUGGESTION_NAME; 53 | } 54 | 55 | @Override 56 | @SuppressWarnings("deprecation") 57 | public int getWriteableType() { 58 | return TYPE; 59 | } 60 | 61 | @Override 62 | protected Entry newEntry(StreamInput in) throws IOException { 63 | return new Entry(in); 64 | } 65 | 66 | public static class Entry extends Suggest.Suggestion.Entry { 67 | 68 | Entry() { 69 | } 70 | 71 | Entry(StreamInput in) throws IOException { 72 | super(in); 73 | } 74 | 75 | @Override 76 | protected Option newOption(StreamInput in) throws IOException { 77 | return new Option(in); 78 | } 79 | 80 | Entry(Text text, int offset, int length) { 81 | super(text, offset, length); 82 | } 83 | 84 | static Entry fromXContent(XContentParser parser) { 85 | return ENTRY_PARSER.apply(parser, null); 86 | } 87 | 88 | public static class Option extends Suggest.Suggestion.Entry.Option { 89 | private static Text EMPTY = new Text("_na_"); 90 | 91 | static final ParseField TEXT_FIELD = new ParseField("text"); 92 | static final ParseField FREQ_FIELD = new ParseField("freq"); 93 | static final ParseField SWITCH_FIELD = new ParseField("switch"); 94 | 95 | private final Text text; 96 | 97 | private int freq; 98 | private boolean switched; 99 | 100 | Option(Text text, int freq, boolean switched) { 101 | super(EMPTY, 0); 102 | this.text = text; 103 | this.freq = freq; 104 | this.switched = switched; 105 | } 106 | 107 | Option(StreamInput in) throws IOException { 108 | super(EMPTY, 0); 109 | text = in.readText(); 110 | freq = in.readVInt(); 111 | switched = in.readBoolean(); 112 | } 113 | 114 | @Override 115 | public void writeTo(StreamOutput out) throws IOException { 116 | out.writeText(text); 117 | out.writeVInt(freq); 118 | out.writeBoolean(switched); 119 | } 120 | 121 | @Override 122 | protected void mergeInto(Suggest.Suggestion.Entry.Option otherOption) { 123 | freq += ((KeyboardLayoutSuggestion.Entry.Option) otherOption).freq; 124 | } 125 | 126 | @Override 127 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 128 | builder.field(TEXT.getPreferredName(), text); 129 | builder.field(FREQ_FIELD.getPreferredName(), freq); 130 | builder.field(SWITCH_FIELD.getPreferredName(), switched); 131 | return builder; 132 | } 133 | 134 | @Override 135 | public boolean equals(Object o) { 136 | if (this == o) { 137 | return true; 138 | } 139 | if (o == null || getClass() != o.getClass()) { 140 | return false; 141 | } 142 | 143 | Option that = (Option) o; 144 | return Objects.equals(text, that.text); 145 | } 146 | 147 | @Override 148 | public int hashCode() { 149 | return Objects.hash(text); 150 | } 151 | 152 | static Option fromXContent(XContentParser parser) { 153 | return OPTIONS_PARSER.apply(parser, null); 154 | } 155 | 156 | private static final ConstructingObjectParser OPTIONS_PARSER = new ConstructingObjectParser<>( 157 | "RussianKeyboardSuggestionOptionParser", 158 | true, 159 | args -> { 160 | Text text = new Text((String) args[0]); 161 | int freq = (Integer) args[1]; 162 | boolean switched = (Boolean) args[2]; 163 | return new Option(text, freq, switched); 164 | }); 165 | 166 | static { 167 | OPTIONS_PARSER.declareString(constructorArg(), TEXT_FIELD); 168 | OPTIONS_PARSER.declareInt(constructorArg(), FREQ_FIELD); 169 | OPTIONS_PARSER.declareBoolean(constructorArg(), SWITCH_FIELD); 170 | } 171 | } 172 | 173 | private static ObjectParser ENTRY_PARSER = new ObjectParser<>("KeyboardLayoutSuggestionEntryParser", true, Entry::new); 174 | 175 | static { 176 | declareCommonFields(ENTRY_PARSER); 177 | ENTRY_PARSER.declareObjectArray(Entry::addOptions, (p, c) -> Option.fromXContent(p), new ParseField(OPTIONS)); 178 | } 179 | } 180 | 181 | public static class Frequency implements Comparator { 182 | @Override 183 | public int compare(Suggest.Suggestion.Entry.Option first, Suggest.Suggestion.Entry.Option second) { 184 | int freqCmp = ((Entry.Option) second).freq - ((Entry.Option) first).freq; 185 | return freqCmp != 0 ? freqCmp : first.getText().compareTo(second.getText()); 186 | } 187 | } 188 | 189 | private static final int TYPE = 152; 190 | private static final Comparator FREQUENCY = new Frequency(); 191 | 192 | } 193 | -------------------------------------------------------------------------------- /README.asciidoc: -------------------------------------------------------------------------------- 1 | = Elasticsearch plugin for keyboard layout suggestions 2 | Nikolay Papakha 3 | ifdef::env-github[] 4 | :tip-caption: :bulb: 5 | :note-caption: :paperclip: 6 | :important-caption: :heavy_exclamation_mark: 7 | :caution-caption: :fire: 8 | :warning-caption: :warning: 9 | endif::[] 10 | ifndef::env-github[] 11 | endif::[] 12 | 13 | :url-releases-page: https://github.com/papahigh/elasticsearch-keyboard-layout/blob/master/releases.asciidoc 14 | :url-issue-tracker: https://github.com/papahigh/elasticsearch-keyboard-layout/issues 15 | :url-pull-request: https://github.com/papahigh/elasticsearch-keyboard-layout/pulls 16 | :url-phonetic-plugin: https://github.com/papahigh/elasticsearch-russian-phonetics 17 | 18 | :url-es-term-suggester: https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-phonetic.html 19 | :url-es-phonetic-analysis: https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-phonetic.html 20 | 21 | image:https://travis-ci.org/papahigh/elasticsearch-keyboard-layout.svg?branch=master["Build Status", link="https://travis-ci.org/papahigh/elasticsearch-keyboard-layout"] 22 | image:https://img.shields.io/badge/License-Apache%202.0-blue.svg[link=https://opensource.org/licenses/Apache-2.0] 23 | 24 | This plugin exposes `*keyboard_layout*` term suggester which suggests terms according to the switched keyboard layout. 25 | 26 | 27 | [source,intent=0] 28 | ._Examples of suggestions this plugin helps to provide_ 29 | ---- 30 | шзрщту ч 64пи ⟶ iphone x 64gb 31 | nt[yjkjus] ⟶ технології 32 | dszdbo ytrfkmrb dsgflrfo ⟶ выявіў некалькі выпадкаў 33 | тшлу rhjccjdrb runner 2 ⟶ nike кроссовки runner 2 34 | ;tcnrbq lbcr 1n, ⟶ жесткий диск 1тб 35 | ---- 36 | 37 | The following keyboard layouts are supported: 38 | 39 | * Russian 40 | * Ukrainian 41 | * Belarusian 42 | 43 | Feel free to open a pull request with any other keyboard layouts. 44 | 45 | This plugin may be used in combination with {url-es-term-suggester}[default term suggester] which is based on string similarity in order to build a google-like search experience known as "did you mean?". 46 | 47 | .link:https://imgur.com/iQ7rp7Ar[Did_you_mean_feature.png] 48 | image::https://i.imgur.com/iQ7rp7Ar.png[223,600] 49 | 50 | 51 | == Installation 52 | 53 | WARNING: Please note that due to the https://github.com/elastic/elasticsearch/pull/30284[serialization issue] this plugin is available only for Elasticsearch 7.0.0 and above. 54 | 55 | In order to install the plugin, {url-releases-page}[choose a version] and run: 56 | 57 | [source,sh] 58 | ---- 59 | $ bin/elasticsearch-plugin install URL 60 | ---- 61 | 62 | where `*URL*` points to zip file of the appropriate release which corresponds to your elasticsearch version. 63 | 64 | IMPORTANT: The plugin must be installed on every node in the cluster, and each node must be restarted after installation. 65 | 66 | E.g., command for Elasticsearch 7.6.0 67 | 68 | [source,sh,options="wrap"] 69 | ---- 70 | # install plugin on Elasticsearch 7.6.0 71 | $ bin/elasticsearch-plugin install https://github.com/papahigh/elasticsearch-keyboard-layout/raw/7.6.0/dist/keyboard-layout-7.6.0.zip 72 | ---- 73 | 74 | After installation this plugin will expose new token filter and term suggester named `*keyboard_layout*`. 75 | 76 | == Getting started with Suggester 77 | You can start using the `*keyboard_layout*` suggester by providing the suggest part of a search request: 78 | 79 | [source,javascript] 80 | -------------------------------------------------- 81 | POST _search 82 | { 83 | "suggest": { 84 | "text": "шЗрщту ЧЫ 64пи", 85 | "keyboard_suggestion": { 86 | "keyboard_layout": { 87 | "field": "content", 88 | "language": "russian", 89 | "lowercase_token": true, 90 | "preserve_case": true, 91 | "add_original": false 92 | } 93 | } 94 | } 95 | } 96 | -------------------------------------------------- 97 | 98 | In the response you should see the original start offset and length in the suggest text and if any found a switched keyboard layout options. 99 | Each options array contains an option object that includes the suggested text and its document frequency. You may also request original token and its frequency by providing `*add_original*` option. 100 | 101 | [source,js] 102 | -------------------------------------------------- 103 | { 104 | "suggest": { 105 | "keyboard_suggestion": [ 106 | { 107 | "text": "шЗрщту", 108 | "offset": 0, 109 | "length": 6, 110 | "options": [ 111 | { 112 | "text": "iPhone", 113 | "freq": 4, 114 | "switch": true 115 | } 116 | ] 117 | }, 118 | { 119 | "text": "ЧЫ", 120 | "offset": 7, 121 | "length": 2, 122 | "options": [ 123 | { 124 | "text": "XS", 125 | "freq": 2, 126 | "switch": true 127 | } 128 | ] 129 | }, 130 | { 131 | "text": "64пи", 132 | "offset": 10, 133 | "length": 4, 134 | "options": [ 135 | { 136 | "text": "64gb", 137 | "freq": 1, 138 | "switch": true 139 | } 140 | ] 141 | } 142 | ] 143 | } 144 | ... 145 | } 146 | -------------------------------------------------- 147 | 148 | Extension for go client github.com/olivere/elastic: https://github.com/aaerofeev/go-elasic-keyboard-layout 149 | 150 | === Suggester options 151 | List of the supported suggester options is as follows: 152 | 153 | [horizontal] 154 | *text*:: 155 | The suggest text. The suggest text is a required option that needs to be set globally or per suggestion. 156 | 157 | *field*:: 158 | The field to fetch the candidate suggestions from. This is an required option that either needs to be set globally or per suggestion. 159 | 160 | *language*:: 161 | The language of the keyboard layout. This is an required option. Available options are: `*russian*`, `*belarusian*`, `*ukrainian*`. 162 | 163 | *analyzer*:: 164 | The analyzer to analyse the suggest text with. Defaults to the https://lucene.apache.org/core/8_0_0/analyzers-common/org/apache/lucene/analysis/core/WhitespaceAnalyzer.html[whitespace analyzer]. 165 | 166 | *lowercase_token*:: 167 | Lower cases terms before frequency evaluation and after the suggest analysis is done. Default is *false*. 168 | 169 | *preserve_case*:: 170 | Whether case should be preserved in the switched suggest options. When *lower_case* is set to *true* this option restores the original case. Defaults to *false*. 171 | 172 | *min_freq*:: 173 | The minimal threshold in number of documents a suggestion should appear in. This can be specified as an absolute number or as a relative percentage of number of documents. This can improve quality by only suggesting high frequency terms. Defaults to 0f and is not enabled. If a value higher than 1 is specified then the number cannot be fractional. The shard level document frequencies are used for this option. 174 | 175 | *max_freq*:: 176 | The maximum threshold in number of documents a suggest text token can exist in order to be included. Can be a relative percentage number (e.g 0.4) or an absolute number to represent document frequencies. If an value higher than 1 is specified then fractional can not be specified. Defaults to -1 and is not enabled. This can be used to exclude high frequency terms from switch keyboard suggestions. The shard level document frequencies are used for this option. 177 | 178 | *add_original*:: 179 | Whether original term and its frequency should be included in the suggest options. Default is *false*. 180 | 181 | == Contribute 182 | Use the {url-issue-tracker}[issue tracker] and/or open {url-pull-request}[pull requests]. 183 | 184 | == Licence 185 | This project is released under version 2.0 of the http://www.apache.org/licenses/LICENSE-2.0[Apache Licence]. 186 | 187 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/search/suggest/keyboard/KeyboardLayoutSuggester.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.search.suggest.keyboard; 17 | 18 | import com.github.papahigh.keyboardswitcher.KeyboardSwitcher; 19 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; 20 | import org.apache.lucene.index.IndexReader; 21 | import org.apache.lucene.index.Term; 22 | import org.apache.lucene.search.IndexSearcher; 23 | import org.apache.lucene.util.BytesRef; 24 | import org.apache.lucene.util.BytesRefBuilder; 25 | import org.apache.lucene.util.CharsRefBuilder; 26 | import org.elasticsearch.common.bytes.BytesArray; 27 | import org.elasticsearch.common.text.Text; 28 | import org.elasticsearch.search.suggest.Suggest; 29 | import org.elasticsearch.search.suggest.Suggester; 30 | import org.elasticsearch.search.suggest.SuggestionSearchContext; 31 | import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator; 32 | 33 | import java.io.IOException; 34 | import java.util.ArrayList; 35 | import java.util.Arrays; 36 | import java.util.List; 37 | import java.util.Locale; 38 | 39 | 40 | public class KeyboardLayoutSuggester extends Suggester { 41 | 42 | static KeyboardLayoutSuggester INSTANCE = new KeyboardLayoutSuggester(); 43 | 44 | private KeyboardLayoutSuggester() { 45 | } 46 | 47 | @Override 48 | protected KeyboardLayoutSuggestion innerExecute(String name, KeyboardLayoutSuggestionContext suggestion, 49 | IndexSearcher searcher, CharsRefBuilder spare) throws IOException { 50 | KeyboardLayoutSuggestion response = new KeyboardLayoutSuggestion(name, suggestion.getSize()); 51 | SuggestionsGenerator generator = new SuggestionsGenerator(searcher.getIndexReader(), response, suggestion); 52 | DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), suggestion.getField(), generator, spare); 53 | return response; 54 | } 55 | 56 | @Override 57 | protected Suggest.Suggestion> emptySuggestion( 58 | String name, KeyboardLayoutSuggestionContext suggestion, CharsRefBuilder spare) throws IOException { 59 | 60 | KeyboardLayoutSuggestion layoutSuggestion = new KeyboardLayoutSuggestion(name, suggestion.getSize()); 61 | List tokens = queryTerms(suggestion, spare); 62 | for (Token token : tokens) { 63 | Text key = new Text(new BytesArray(token.term.bytes())); 64 | KeyboardLayoutSuggestion.Entry resultEntry = new KeyboardLayoutSuggestion.Entry( 65 | key, token.startOffset, token.endOffset - token.startOffset); 66 | layoutSuggestion.addTerm(resultEntry); 67 | } 68 | return layoutSuggestion; 69 | } 70 | 71 | private static List queryTerms(SuggestionSearchContext.SuggestionContext suggestion, 72 | CharsRefBuilder spare) throws IOException { 73 | final List result = new ArrayList<>(); 74 | final String field = suggestion.getField(); 75 | DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, 76 | new DirectCandidateGenerator.TokenConsumer() { 77 | @Override 78 | public void nextToken() { 79 | Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); 80 | result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); 81 | } 82 | }, spare); 83 | return result; 84 | } 85 | 86 | private static class Token { 87 | 88 | public final Term term; 89 | public final int startOffset; 90 | public final int endOffset; 91 | 92 | private Token(Term term, int startOffset, int endOffset) { 93 | this.term = term; 94 | this.startOffset = startOffset; 95 | this.endOffset = endOffset; 96 | } 97 | 98 | } 99 | 100 | static class SuggestionsGenerator extends DirectCandidateGenerator.TokenConsumer { 101 | 102 | final IndexReader ir; 103 | final String field; 104 | final KeyboardSwitcher switcher; 105 | final KeyboardLayoutSuggestion acc; 106 | final double minFreq; 107 | final double maxFreq; 108 | final boolean lowercaseToken; 109 | final boolean preserveCase; 110 | final boolean addOriginal; 111 | 112 | private SuggestionsGenerator(IndexReader ir, KeyboardLayoutSuggestion acc, KeyboardLayoutSuggestionContext context) { 113 | this.ir = ir; 114 | this.acc = acc; 115 | this.field = context.getField(); 116 | this.switcher = context.switcher; 117 | this.minFreq = context.minFreq; 118 | this.maxFreq = context.maxFreq; 119 | this.lowercaseToken = context.lowercaseToken; 120 | this.preserveCase = context.preserveCase; 121 | this.addOriginal = context.addOriginal; 122 | } 123 | 124 | @Override 125 | public void nextToken() throws IOException { 126 | 127 | BytesRef originalRef = BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())); 128 | KeyboardLayoutSuggestion.Entry suggestion = newEntry(originalRef, offsetAttr); 129 | 130 | Term originalTerm = new Term(field, originalRef); 131 | 132 | String token = originalTerm.text(); 133 | 134 | int length = token.length(); 135 | char[] tokenChars = token.toCharArray(); 136 | char[] tokenCharsCased = lowercaseToken 137 | ? token.toLowerCase(Locale.ROOT).toCharArray() 138 | : tokenChars; 139 | 140 | char[] tokenCharsCasedAndSwitched = switcher.switchLayout(tokenCharsCased, 0, length, false); 141 | 142 | if (!Arrays.equals(tokenCharsCased, tokenCharsCasedAndSwitched)) { 143 | 144 | BytesRef switchedFreqCountingRef = toBytesRef(tokenCharsCasedAndSwitched); 145 | int docFreq = ir.docFreq(new Term(field, switchedFreqCountingRef)); 146 | double maxDoc = ir.maxDoc(); 147 | 148 | if (isNormalFreq(maxDoc, docFreq)) { 149 | 150 | BytesRef optionValueRef = lowercaseToken && preserveCase 151 | ? toBytesRef(switcher.switchLayout(tokenChars, 0, length, false)) 152 | : switchedFreqCountingRef; 153 | 154 | suggestion.addOption(newSwitchedOption(optionValueRef, docFreq)); 155 | 156 | if (addOriginal) { 157 | int originalCasedFreq = ir.docFreq( 158 | lowercaseToken ? new Term(field, toBytesRef(tokenCharsCased)) : originalTerm 159 | ); 160 | suggestion.addOption(newOriginalOption(originalRef, originalCasedFreq)); 161 | } 162 | 163 | } 164 | } 165 | 166 | acc.addTerm(suggestion); 167 | } 168 | 169 | 170 | private boolean isNormalFreq(double maxDoc, int docFreq) { 171 | return docFreq > 0 && 172 | // skip low freq terms 173 | (minFreq >= 1f && docFreq >= minFreq || docFreq >= Math.ceil(minFreq * maxDoc)) && 174 | // skip high freq terms 175 | (maxFreq == -1 || maxFreq >= 1f && docFreq <= maxDoc || docFreq <= Math.ceil(maxFreq * maxDoc)); 176 | } 177 | } 178 | 179 | private static BytesRef toBytesRef(char[] chars) { 180 | BytesRefBuilder builder = new BytesRefBuilder(); 181 | builder.copyChars(chars, 0, chars.length); 182 | return BytesRef.deepCopyOf(builder.get()); 183 | } 184 | 185 | private static KeyboardLayoutSuggestion.Entry.Option newSwitchedOption(BytesRef ref, int freq) { 186 | return new KeyboardLayoutSuggestion.Entry.Option(new Text(new BytesArray(ref)), freq, true); 187 | } 188 | 189 | private static KeyboardLayoutSuggestion.Entry.Option newOriginalOption(BytesRef ref, int freq) { 190 | return new KeyboardLayoutSuggestion.Entry.Option(new Text(new BytesArray(ref)), freq, false); 191 | } 192 | 193 | private static KeyboardLayoutSuggestion.Entry newEntry(BytesRef bytes, OffsetAttribute offsetAttr) { 194 | return new KeyboardLayoutSuggestion.Entry(new Text(new BytesArray(bytes)), offsetAttr.startOffset(), 195 | offsetAttr.endOffset() - offsetAttr.startOffset()); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /licenses/lucene-NOTICE.txt: -------------------------------------------------------------------------------- 1 | Apache Lucene 2 | Copyright 2014 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | Includes software from other Apache Software Foundation projects, 8 | including, but not limited to: 9 | - Apache Ant 10 | - Apache Jakarta Regexp 11 | - Apache Commons 12 | - Apache Xerces 13 | 14 | ICU4J, (under analysis/icu) is licensed under an MIT styles license 15 | and Copyright (c) 1995-2008 International Business Machines Corporation and others 16 | 17 | Some data files (under analysis/icu/src/data) are derived from Unicode data such 18 | as the Unicode Character Database. See http://unicode.org/copyright.html for more 19 | details. 20 | 21 | Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is 22 | BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/ 23 | 24 | The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were 25 | automatically generated with the moman/finenight FSA library, created by 26 | Jean-Philippe Barrette-LaPierre. This library is available under an MIT license, 27 | see http://sites.google.com/site/rrettesite/moman and 28 | http://bitbucket.org/jpbarrette/moman/overview/ 29 | 30 | The class org.apache.lucene.util.WeakIdentityMap was derived from 31 | the Apache CXF project and is Apache License 2.0. 32 | 33 | The Google Code Prettify is Apache License 2.0. 34 | See http://code.google.com/p/google-code-prettify/ 35 | 36 | JUnit (junit-4.10) is licensed under the Common Public License v. 1.0 37 | See http://junit.sourceforge.net/cpl-v10.html 38 | 39 | This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin 40 | g Package (jaspell): http://jaspell.sourceforge.net/ 41 | License: The BSD License (http://www.opensource.org/licenses/bsd-license.php) 42 | 43 | The snowball stemmers in 44 | analysis/common/src/java/net/sf/snowball 45 | were developed by Martin Porter and Richard Boulton. 46 | The snowball stopword lists in 47 | analysis/common/src/resources/org/apache/lucene/analysis/snowball 48 | were developed by Martin Porter and Richard Boulton. 49 | The full snowball package is available from 50 | http://snowball.tartarus.org/ 51 | 52 | The KStem stemmer in 53 | analysis/common/src/org/apache/lucene/analysis/en 54 | was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst) 55 | under the BSD-license. 56 | 57 | The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default 58 | stopword list that is BSD-licensed created by Jacques Savoy. These files reside in: 59 | analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt, 60 | analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt, 61 | analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt, 62 | analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt, 63 | analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt, 64 | analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt 65 | See http://members.unine.ch/jacques.savoy/clef/index.html. 66 | 67 | The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers 68 | (common) are based on BSD-licensed reference implementations created by Jacques Savoy and 69 | Ljiljana Dolamic. These files reside in: 70 | analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java 71 | analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java 72 | analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java 73 | analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java 74 | analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java 75 | analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java 76 | analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java 77 | analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java 78 | analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java 79 | analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java 80 | analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java 81 | 82 | The Stempel analyzer (stempel) includes BSD-licensed software developed 83 | by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil, 84 | and Edmond Nolan. 85 | 86 | The Polish analyzer (stempel) comes with a default 87 | stopword list that is BSD-licensed created by the Carrot2 project. The file resides 88 | in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt. 89 | See http://project.carrot2.org/license.html. 90 | 91 | The SmartChineseAnalyzer source code (smartcn) was 92 | provided by Xiaoping Gao and copyright 2009 by www.imdict.net. 93 | 94 | WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) 95 | is derived from Unicode data such as the Unicode Character Database. 96 | See http://unicode.org/copyright.html for more details. 97 | 98 | The Morfologik analyzer (morfologik) includes BSD-licensed software 99 | developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/). 100 | 101 | Morfologik uses data from Polish ispell/myspell dictionary 102 | (http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia) 103 | LGPL and Creative Commons ShareAlike. 104 | 105 | Morfologic includes data from BSD-licensed dictionary of Polish (SGJP) 106 | (http://sgjp.pl/morfeusz/) 107 | 108 | Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original 109 | source code for this can be found at http://www.eclipse.org/jetty/downloads.php 110 | 111 | =========================================================================== 112 | Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration 113 | =========================================================================== 114 | 115 | This software includes a binary and/or source version of data from 116 | 117 | mecab-ipadic-2.7.0-20070801 118 | 119 | which can be obtained from 120 | 121 | http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz 122 | 123 | or 124 | 125 | http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz 126 | 127 | =========================================================================== 128 | mecab-ipadic-2.7.0-20070801 Notice 129 | =========================================================================== 130 | 131 | Nara Institute of Science and Technology (NAIST), 132 | the copyright holders, disclaims all warranties with regard to this 133 | software, including all implied warranties of merchantability and 134 | fitness, in no event shall NAIST be liable for 135 | any special, indirect or consequential damages or any damages 136 | whatsoever resulting from loss of use, data or profits, whether in an 137 | action of contract, negligence or other tortuous action, arising out 138 | of or in connection with the use or performance of this software. 139 | 140 | A large portion of the dictionary entries 141 | originate from ICOT Free Software. The following conditions for ICOT 142 | Free Software applies to the current dictionary as well. 143 | 144 | Each User may also freely distribute the Program, whether in its 145 | original form or modified, to any third party or parties, PROVIDED 146 | that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear 147 | on, or be attached to, the Program, which is distributed substantially 148 | in the same form as set out herein and that such intended 149 | distribution, if actually made, will neither violate or otherwise 150 | contravene any of the laws and regulations of the countries having 151 | jurisdiction over the User or the intended distribution itself. 152 | 153 | NO WARRANTY 154 | 155 | The program was produced on an experimental basis in the course of the 156 | research and development conducted during the project and is provided 157 | to users as so produced on an experimental basis. Accordingly, the 158 | program is provided without any warranty whatsoever, whether express, 159 | implied, statutory or otherwise. The term "warranty" used herein 160 | includes, but is not limited to, any warranty of the quality, 161 | performance, merchantability and fitness for a particular purpose of 162 | the program and the nonexistence of any infringement or violation of 163 | any right of any third party. 164 | 165 | Each user of the program will agree and understand, and be deemed to 166 | have agreed and understood, that there is no warranty whatsoever for 167 | the program and, accordingly, the entire risk arising from or 168 | otherwise connected with the program is assumed by the user. 169 | 170 | Therefore, neither ICOT, the copyright holder, or any other 171 | organization that participated in or was otherwise related to the 172 | development of the program and their respective officials, directors, 173 | officers and other employees shall be held liable for any and all 174 | damages, including, without limitation, general, special, incidental 175 | and consequential damages, arising out of or otherwise in connection 176 | with the use or inability to use the program or any product, material 177 | or result produced or otherwise obtained by using the program, 178 | regardless of whether they have been advised of, or otherwise had 179 | knowledge of, the possibility of such damages at any time during the 180 | project or thereafter. Each user will be deemed to have agreed to the 181 | foregoing by his or her commencement of use of the program. The term 182 | "use" as used herein includes, but is not limited to, the use, 183 | modification, copying and distribution of the program and the 184 | production of secondary products from the program. 185 | 186 | In the case where the program, whether in its original form or 187 | modified, was distributed or delivered to or received by a user from 188 | any person, organization or entity other than ICOT, unless it makes or 189 | grants independently of ICOT any specific warranty to the user in 190 | writing, such person, organization or entity, will also be exempted 191 | from and not be held liable to the user for any such damages as noted 192 | above as far as the program is concerned. 193 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/search/suggest/keyboard/KeyboardLayoutSuggestionBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Nikolay Papakha 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.elasticsearch.search.suggest.keyboard; 17 | 18 | import com.github.papahigh.keyboardswitcher.KeyboardSwitcherProvider; 19 | import org.elasticsearch.ElasticsearchParseException; 20 | import org.elasticsearch.common.ParseField; 21 | import org.elasticsearch.common.ParsingException; 22 | import org.elasticsearch.common.io.stream.StreamInput; 23 | import org.elasticsearch.common.io.stream.StreamOutput; 24 | import org.elasticsearch.common.xcontent.XContentBuilder; 25 | import org.elasticsearch.common.xcontent.XContentParser; 26 | import org.elasticsearch.index.query.QueryShardContext; 27 | import org.elasticsearch.search.suggest.SuggestionBuilder; 28 | import org.elasticsearch.search.suggest.SuggestionSearchContext; 29 | 30 | import java.io.IOException; 31 | import java.util.Objects; 32 | 33 | 34 | public final class KeyboardLayoutSuggestionBuilder extends SuggestionBuilder { 35 | 36 | public static final String SUGGESTION_NAME = "keyboard_layout"; 37 | 38 | private static final ParseField LANGUAGE_FIELD = new ParseField("language"); 39 | private static final ParseField MAX_FREQ_FIELD = new ParseField("max_freq"); 40 | private static final ParseField MIN_FREQ_FIELD = new ParseField("min_freq"); 41 | private static final ParseField LOWERCASE_TOKEN_FIELD = new ParseField("lowercase_token"); 42 | private static final ParseField ADD_ORIGINAL_FIELD = new ParseField("add_original"); 43 | private static final ParseField PRESERVE_CASE_FIELD = new ParseField("preserve_case"); 44 | 45 | private String language; 46 | private double minFreq = 0d; 47 | private double maxFreq = -1d; 48 | private boolean lowercaseToken = false; 49 | private boolean addOriginal = false; 50 | private boolean preserveCase = false; 51 | 52 | private KeyboardLayoutSuggestionBuilder(String field) { 53 | super(field); 54 | } 55 | 56 | public KeyboardLayoutSuggestionBuilder(StreamInput in) throws IOException { 57 | super(in); 58 | language = in.readString(); 59 | minFreq = in.readDouble(); 60 | maxFreq = in.readDouble(); 61 | lowercaseToken = in.readBoolean(); 62 | preserveCase = in.readBoolean(); 63 | addOriginal = in.readBoolean(); 64 | } 65 | 66 | private KeyboardLayoutSuggestionBuilder(String field, KeyboardLayoutSuggestionBuilder in) { 67 | super(field); 68 | language = in.language; 69 | analyzer = in.analyzer; 70 | text = in.text; 71 | minFreq = in.minFreq; 72 | maxFreq = in.maxFreq; 73 | lowercaseToken = in.lowercaseToken; 74 | preserveCase = in.preserveCase; 75 | addOriginal = in.addOriginal; 76 | } 77 | 78 | @Override 79 | protected void doWriteTo(StreamOutput out) throws IOException { 80 | out.writeString(language); 81 | out.writeDouble(minFreq); 82 | out.writeDouble(maxFreq); 83 | out.writeBoolean(lowercaseToken); 84 | out.writeBoolean(preserveCase); 85 | out.writeBoolean(addOriginal); 86 | } 87 | 88 | @Override 89 | protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { 90 | builder.field(LANGUAGE_FIELD.getPreferredName(), language); 91 | builder.field(MIN_FREQ_FIELD.getPreferredName(), minFreq); 92 | builder.field(MAX_FREQ_FIELD.getPreferredName(), maxFreq); 93 | builder.field(LOWERCASE_TOKEN_FIELD.getPreferredName(), lowercaseToken); 94 | builder.field(PRESERVE_CASE_FIELD.getPreferredName(), preserveCase); 95 | builder.field(ADD_ORIGINAL_FIELD.getPreferredName(), addOriginal); 96 | return builder; 97 | } 98 | 99 | 100 | @Override 101 | protected SuggestionSearchContext.SuggestionContext build(QueryShardContext context) { 102 | KeyboardLayoutSuggestionContext suggestionContext = new KeyboardLayoutSuggestionContext(context, 103 | KeyboardSwitcherProvider.provide(language), minFreq, maxFreq, lowercaseToken, preserveCase, addOriginal); 104 | populateCommonFields(context.getMapperService(), suggestionContext); 105 | return suggestionContext; 106 | } 107 | 108 | @Override 109 | public String getWriteableName() { 110 | return SUGGESTION_NAME; 111 | } 112 | 113 | @Override 114 | protected boolean doEquals(KeyboardLayoutSuggestionBuilder other) { 115 | return Objects.equals(language, other.language) && 116 | Objects.equals(minFreq, other.minFreq) && 117 | Objects.equals(maxFreq, other.maxFreq) && 118 | Objects.equals(lowercaseToken, other.lowercaseToken) && 119 | Objects.equals(preserveCase, other.preserveCase) && 120 | Objects.equals(addOriginal, other.addOriginal); 121 | } 122 | 123 | @Override 124 | protected int doHashCode() { 125 | return Objects.hash(language, minFreq, maxFreq, lowercaseToken, preserveCase, addOriginal); 126 | } 127 | 128 | private void minFreq(double minFreq) { 129 | if (minFreq < 0.0d) { 130 | throw new IllegalArgumentException("minDocFreq must be positive"); 131 | } 132 | if (minFreq > 1.0d && minFreq != Math.floor(minFreq)) { 133 | throw new IllegalArgumentException("if minDocFreq is greater than 1, it must not be a fraction"); 134 | } 135 | this.minFreq = minFreq; 136 | } 137 | 138 | private void maxFreq(double maxFreq) { 139 | if (maxFreq < 0.0d) { 140 | throw new IllegalArgumentException("maxFreq must be positive"); 141 | } 142 | if (maxFreq > 1.0d && maxFreq != Math.floor(maxFreq)) { 143 | throw new IllegalArgumentException("if maxFreq is greater than 1, it must not be a fraction"); 144 | } 145 | this.maxFreq = maxFreq; 146 | } 147 | 148 | private void lowercaseToken(boolean lowercaseToken) { 149 | this.lowercaseToken = lowercaseToken; 150 | } 151 | 152 | private void addOriginal(boolean addOriginal) { 153 | this.addOriginal = addOriginal; 154 | } 155 | 156 | private void preserveCase(boolean preserveCase) { 157 | this.preserveCase = preserveCase; 158 | } 159 | 160 | private void language(String language) { 161 | this.language = language; 162 | } 163 | 164 | public static KeyboardLayoutSuggestionBuilder fromXContent(XContentParser parser) throws IOException { 165 | 166 | KeyboardLayoutSuggestionBuilder tmpValuesHolder = new KeyboardLayoutSuggestionBuilder("_na_"); 167 | tmpValuesHolder.analyzer("keyboard_analyzer"); 168 | 169 | XContentParser.Token token; 170 | String currentFieldName = ""; 171 | String fieldName = null; 172 | 173 | while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { 174 | if (token == XContentParser.Token.FIELD_NAME) { 175 | currentFieldName = parser.currentName(); 176 | } else if (token.isValue()) { 177 | if (FIELDNAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 178 | fieldName = parser.text(); 179 | } else if (ANALYZER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 180 | tmpValuesHolder.analyzer(parser.text()); 181 | } else if (TEXT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 182 | tmpValuesHolder.text(parser.text()); 183 | } else if (SIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 184 | tmpValuesHolder.size(parser.intValue()); 185 | } else if (SHARDSIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 186 | tmpValuesHolder.shardSize(parser.intValue()); 187 | } else if (MAX_FREQ_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 188 | tmpValuesHolder.maxFreq(parser.doubleValue()); 189 | } else if (MIN_FREQ_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 190 | tmpValuesHolder.minFreq(parser.doubleValue()); 191 | } else if (ADD_ORIGINAL_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 192 | tmpValuesHolder.addOriginal(parser.booleanValue()); 193 | } else if (LOWERCASE_TOKEN_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 194 | tmpValuesHolder.lowercaseToken(parser.booleanValue()); 195 | } else if (PRESERVE_CASE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 196 | tmpValuesHolder.preserveCase(parser.booleanValue()); 197 | } else if (LANGUAGE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { 198 | tmpValuesHolder.language(parser.text()); 199 | } else { 200 | throw new ParsingException(parser.getTokenLocation(), 201 | "suggester[" + SUGGESTION_NAME + "] doesn't support field [" + currentFieldName + "]"); 202 | } 203 | 204 | } else { 205 | throw new ParsingException(parser.getTokenLocation(), "suggester[" + SUGGESTION_NAME + " ] " + 206 | "parsing failed on [" + currentFieldName + "]"); 207 | } 208 | } 209 | 210 | if (fieldName == null) { 211 | throw new ElasticsearchParseException( 212 | "the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing"); 213 | } 214 | 215 | return new KeyboardLayoutSuggestionBuilder(fieldName, tmpValuesHolder); 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /licenses/commons-codec-LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /licenses/lucene-LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | 205 | 206 | Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was 207 | derived from unicode conversion examples available at 208 | http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright 209 | from those sources: 210 | 211 | /* 212 | * Copyright 2001-2004 Unicode, Inc. 213 | * 214 | * Disclaimer 215 | * 216 | * This source code is provided as is by Unicode, Inc. No claims are 217 | * made as to fitness for any particular purpose. No warranties of any 218 | * kind are expressed or implied. The recipient agrees to determine 219 | * applicability of information provided. If this file has been 220 | * purchased on magnetic or optical media from Unicode, Inc., the 221 | * sole remedy for any claim will be exchange of defective media 222 | * within 90 days of receipt. 223 | * 224 | * Limitations on Rights to Redistribute This Code 225 | * 226 | * Unicode, Inc. hereby grants the right to freely use the information 227 | * supplied in this file in the creation of products supporting the 228 | * Unicode Standard, and to make copies of this file in any form 229 | * for internal or external distribution as long as this notice 230 | * remains attached. 231 | */ 232 | 233 | 234 | Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was 235 | derived from Python 2.4.2 sources available at 236 | http://www.python.org. Full license is here: 237 | 238 | http://www.python.org/download/releases/2.4.2/license/ 239 | 240 | Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was 241 | derived from Python 3.1.2 sources available at 242 | http://www.python.org. Full license is here: 243 | 244 | http://www.python.org/download/releases/3.1.2/license/ 245 | 246 | Some code in core/src/java/org/apache/lucene/util/automaton was 247 | derived from Brics automaton sources available at 248 | www.brics.dk/automaton/. Here is the copyright from those sources: 249 | 250 | /* 251 | * Copyright (c) 2001-2009 Anders Moeller 252 | * All rights reserved. 253 | * 254 | * Redistribution and use in source and binary forms, with or without 255 | * modification, are permitted provided that the following conditions 256 | * are met: 257 | * 1. Redistributions of source code must retain the above copyright 258 | * notice, this list of conditions and the following disclaimer. 259 | * 2. Redistributions in binary form must reproduce the above copyright 260 | * notice, this list of conditions and the following disclaimer in the 261 | * documentation and/or other materials provided with the distribution. 262 | * 3. The name of the author may not be used to endorse or promote products 263 | * derived from this software without specific prior written permission. 264 | * 265 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 266 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 267 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 268 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 269 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 270 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 271 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 272 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 273 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 274 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 275 | */ 276 | 277 | The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton 278 | were automatically generated with the moman/finenight FSA package. 279 | Here is the copyright for those sources: 280 | 281 | # Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, 282 | # 283 | # Permission is hereby granted, free of charge, to any person 284 | # obtaining a copy of this software and associated documentation 285 | # files (the "Software"), to deal in the Software without 286 | # restriction, including without limitation the rights to use, 287 | # copy, modify, merge, publish, distribute, sublicense, and/or sell 288 | # copies of the Software, and to permit persons to whom the 289 | # Software is furnished to do so, subject to the following 290 | # conditions: 291 | # 292 | # The above copyright notice and this permission notice shall be 293 | # included in all copies or substantial portions of the Software. 294 | # 295 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 296 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 297 | # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 298 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 299 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 300 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 301 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 302 | # OTHER DEALINGS IN THE SOFTWARE. 303 | 304 | Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was 305 | derived from ICU (http://www.icu-project.org) 306 | The full license is available here: 307 | http://source.icu-project.org/repos/icu/icu/trunk/license.html 308 | 309 | /* 310 | * Copyright (C) 1999-2010, International Business Machines 311 | * Corporation and others. All Rights Reserved. 312 | * 313 | * Permission is hereby granted, free of charge, to any person obtaining a copy 314 | * of this software and associated documentation files (the "Software"), to deal 315 | * in the Software without restriction, including without limitation the rights 316 | * to use, copy, modify, merge, publish, distribute, and/or sell copies of the 317 | * Software, and to permit persons to whom the Software is furnished to do so, 318 | * provided that the above copyright notice(s) and this permission notice appear 319 | * in all copies of the Software and that both the above copyright notice(s) and 320 | * this permission notice appear in supporting documentation. 321 | * 322 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 323 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 324 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. 325 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE 326 | * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR 327 | * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 328 | * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 329 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 330 | * 331 | * Except as contained in this notice, the name of a copyright holder shall not 332 | * be used in advertising or otherwise to promote the sale, use or other 333 | * dealings in this Software without prior written authorization of the 334 | * copyright holder. 335 | */ 336 | 337 | The following license applies to the Snowball stemmers: 338 | 339 | Copyright (c) 2001, Dr Martin Porter 340 | Copyright (c) 2002, Richard Boulton 341 | All rights reserved. 342 | 343 | Redistribution and use in source and binary forms, with or without 344 | modification, are permitted provided that the following conditions are met: 345 | 346 | * Redistributions of source code must retain the above copyright notice, 347 | * this list of conditions and the following disclaimer. 348 | * Redistributions in binary form must reproduce the above copyright 349 | * notice, this list of conditions and the following disclaimer in the 350 | * documentation and/or other materials provided with the distribution. 351 | * Neither the name of the copyright holders nor the names of its contributors 352 | * may be used to endorse or promote products derived from this software 353 | * without specific prior written permission. 354 | 355 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 356 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 357 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 358 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 359 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 360 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 361 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 362 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 363 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 364 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 365 | 366 | The following license applies to the KStemmer: 367 | 368 | Copyright © 2003, 369 | Center for Intelligent Information Retrieval, 370 | University of Massachusetts, Amherst. 371 | All rights reserved. 372 | 373 | Redistribution and use in source and binary forms, with or without modification, 374 | are permitted provided that the following conditions are met: 375 | 376 | 1. Redistributions of source code must retain the above copyright notice, this 377 | list of conditions and the following disclaimer. 378 | 379 | 2. Redistributions in binary form must reproduce the above copyright notice, 380 | this list of conditions and the following disclaimer in the documentation 381 | and/or other materials provided with the distribution. 382 | 383 | 3. The names "Center for Intelligent Information Retrieval" and 384 | "University of Massachusetts" must not be used to endorse or promote products 385 | derived from this software without prior written permission. To obtain 386 | permission, contact info@ciir.cs.umass.edu. 387 | 388 | THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS 389 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 390 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 391 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 392 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 393 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 394 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 395 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 396 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 397 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 398 | SUCH DAMAGE. 399 | 400 | The following license applies to the Morfologik project: 401 | 402 | Copyright (c) 2006 Dawid Weiss 403 | Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski 404 | All rights reserved. 405 | 406 | Redistribution and use in source and binary forms, with or without modification, 407 | are permitted provided that the following conditions are met: 408 | 409 | * Redistributions of source code must retain the above copyright notice, 410 | this list of conditions and the following disclaimer. 411 | 412 | * Redistributions in binary form must reproduce the above copyright notice, 413 | this list of conditions and the following disclaimer in the documentation 414 | and/or other materials provided with the distribution. 415 | 416 | * Neither the name of Morfologik nor the names of its contributors 417 | may be used to endorse or promote products derived from this software 418 | without specific prior written permission. 419 | 420 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 421 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 422 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 423 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 424 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 425 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 426 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 427 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 428 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 429 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 430 | 431 | --- 432 | 433 | The dictionary comes from Morfologik project. Morfologik uses data from 434 | Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and 435 | is licenced on the terms of (inter alia) LGPL and Creative Commons 436 | ShareAlike. The part-of-speech tags were added in Morfologik project and 437 | are not found in the data from sjp.pl. The tagset is similar to IPI PAN 438 | tagset. 439 | 440 | --- 441 | 442 | The following license applies to the Morfeusz project, 443 | used by org.apache.lucene.analysis.morfologik. 444 | 445 | BSD-licensed dictionary of Polish (SGJP) 446 | http://sgjp.pl/morfeusz/ 447 | 448 | Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 449 | Marcin Woliński, Robert Wołosz 450 | 451 | All rights reserved. 452 | 453 | Redistribution and use in source and binary forms, with or without 454 | modification, are permitted provided that the following conditions are 455 | met: 456 | 457 | 1. Redistributions of source code must retain the above copyright 458 | notice, this list of conditions and the following disclaimer. 459 | 460 | 2. Redistributions in binary form must reproduce the above copyright 461 | notice, this list of conditions and the following disclaimer in the 462 | documentation and/or other materials provided with the 463 | distribution. 464 | 465 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS 466 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 467 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 468 | DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE 469 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 470 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 471 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 472 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 473 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 474 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 475 | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 476 | --------------------------------------------------------------------------------