├── .gitignore ├── analyzer-test └── src │ ├── main │ └── java │ │ └── com │ │ └── galerieslafayette │ │ └── analyzer │ │ ├── document │ │ ├── SearchEntity.java │ │ ├── SearchResponse.java │ │ ├── Hits.java │ │ ├── SearchHits.java │ │ └── MyDocument.java │ │ └── client │ │ ├── BodyStringEncoder.java │ │ └── ESClient.java │ └── test │ └── resources │ ├── log4j.properties │ ├── my_type_mapping.json │ ├── my_type_mapping_old.json │ └── my_index_settings.json ├── es51X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es52X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es53X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es54X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es55X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es56X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es60X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es61X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es62X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ ├── BenchmarkIT.java │ │ │ └── FrenchPhoneticBenchmark.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es63X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ └── BenchmarkIT.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es64X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ └── BenchmarkIT.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── es65X ├── src │ ├── main │ │ ├── resources │ │ │ └── plugin-descriptor.properties │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ ├── plugin │ │ │ └── analysis │ │ │ │ └── AnalysisFrenchPhonetic.java │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FrenchPhoneticTokenFilterFactory.java │ │ │ └── FrenchPhoneticAnalyzer.java │ ├── test │ │ └── java │ │ │ └── com │ │ │ └── galerieslafayette │ │ │ └── index │ │ │ └── analysis │ │ │ ├── FakeTokenStream.java │ │ │ └── BenchmarkIT.java │ └── assembly │ │ └── plugin.xml └── pom.xml ├── analyzer-core ├── pom.xml └── src │ └── test │ └── java │ └── com │ └── galerieslafayette │ └── analyzer │ └── EncoderTest.java └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.iml 3 | .idea 4 | .classpath 5 | .project 6 | .settings/ 7 | target/ 8 | **/*.jmh.json 9 | -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/document/SearchEntity.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.document; 2 | 3 | public enum SearchEntity { 4 | USER; 5 | } 6 | -------------------------------------------------------------------------------- /analyzer-test/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger = ALL, Console 2 | log4j.appender.Console=org.apache.log4j.ConsoleAppender 3 | log4j.appender.Console.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.Console.layout.conversionPattern=%m%n -------------------------------------------------------------------------------- /es51X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.1.${es51X.version} -------------------------------------------------------------------------------- /es52X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.2.${es52X.version} -------------------------------------------------------------------------------- /es53X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.3.${es53X.version} -------------------------------------------------------------------------------- /es54X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.4.${es54X.version} -------------------------------------------------------------------------------- /es55X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.5.${es55X.version} -------------------------------------------------------------------------------- /es56X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=5.6.${es56X.version} -------------------------------------------------------------------------------- /es60X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.0.${es60X.version} -------------------------------------------------------------------------------- /es61X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.1.${es61X.version} -------------------------------------------------------------------------------- /es62X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.2.${es62X.version} -------------------------------------------------------------------------------- /es63X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.3.${es63X.version} -------------------------------------------------------------------------------- /es64X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.4.${es64X.version} -------------------------------------------------------------------------------- /es65X/src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=${project.description}. 2 | version=${project.version} 3 | name=french-phonetic 4 | classname=com.galerieslafayette.plugin.analysis.AnalysisFrenchPhonetic 5 | java.version=1.8 6 | elasticsearch.version=6.5.${es65X.version} -------------------------------------------------------------------------------- /analyzer-test/src/test/resources/my_type_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "my_type": { 3 | "properties": { 4 | "id": { 5 | "type": "integer" 6 | }, 7 | "content": { 8 | "type": "text", 9 | "analyzer": "phonetic_index", 10 | "search_analyzer": "phonetic_search" 11 | } 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/document/SearchResponse.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.document; 2 | 3 | public class SearchResponse { 4 | 5 | private Hits hits; 6 | 7 | public Hits getHits() { 8 | return hits; 9 | } 10 | 11 | public void setHits(Hits hits) { 12 | this.hits = hits; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /analyzer-test/src/test/resources/my_type_mapping_old.json: -------------------------------------------------------------------------------- 1 | { 2 | "my_type": { 3 | "properties": { 4 | "id": { 5 | "type": "integer" 6 | }, 7 | "content": { 8 | "type": "string", 9 | "index": "analyzed", 10 | "analyzer": "phonetic_index", 11 | "search_analyzer": "phonetic_search" 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/document/Hits.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.document; 2 | 3 | public class Hits { 4 | 5 | private SearchHits[] hits; 6 | 7 | public SearchHits[] getHits() { 8 | return hits; 9 | } 10 | 11 | public void setHits(SearchHits[] hits) { 12 | this.hits = hits; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /es51X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es52X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es53X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es54X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es55X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es56X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es60X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es61X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es62X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es63X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es64X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /es65X/src/test/java/com/galerieslafayette/index/analysis/FakeTokenStream.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | 5 | import java.io.IOException; 6 | 7 | public final class FakeTokenStream extends TokenStream { 8 | 9 | @Override 10 | public final boolean incrementToken() throws IOException { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/document/SearchHits.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.document; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | 5 | public class SearchHits { 6 | 7 | @JsonProperty("_source") 8 | private MyDocument source; 9 | 10 | public MyDocument getSource() { 11 | return source; 12 | } 13 | 14 | public void setSource(MyDocument source) { 15 | this.source = source; 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/document/MyDocument.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.document; 2 | 3 | public class MyDocument { 4 | 5 | private Long id; 6 | private String content; 7 | 8 | public Long getId() { 9 | return id; 10 | } 11 | 12 | public void setId(Long id) { 13 | this.id = id; 14 | } 15 | 16 | public String getContent() { 17 | return content; 18 | } 19 | 20 | public void setContent(String content) { 21 | this.content = content; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/client/BodyStringEncoder.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.client; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import feign.RequestTemplate; 5 | import feign.jackson.JacksonEncoder; 6 | 7 | import java.lang.reflect.Type; 8 | 9 | public class BodyStringEncoder extends JacksonEncoder { 10 | 11 | public BodyStringEncoder(ObjectMapper mapper) { 12 | super(mapper); 13 | } 14 | 15 | @Override 16 | public void encode(Object object, Type bodyType, RequestTemplate template) { 17 | if (String.class.equals(bodyType)) { 18 | template.body((String) object); 19 | } else { 20 | super.encode(object, bodyType, template); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /es51X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es52X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es53X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es54X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es55X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es56X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es60X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es61X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es62X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es63X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es64X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es65X/src/main/java/com/galerieslafayette/plugin/analysis/AnalysisFrenchPhonetic.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.plugin.analysis; 2 | 3 | import com.galerieslafayette.index.analysis.FrenchPhoneticTokenFilterFactory; 4 | import org.elasticsearch.index.analysis.TokenFilterFactory; 5 | import org.elasticsearch.plugins.AnalysisPlugin; 6 | import org.elasticsearch.plugins.Plugin; 7 | 8 | import java.util.Collections; 9 | import java.util.Map; 10 | 11 | import static org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; 12 | 13 | public class AnalysisFrenchPhonetic extends Plugin implements AnalysisPlugin{ 14 | 15 | @Override 16 | public Map> getTokenFilters() { 17 | return Collections.singletonMap("french_phonetic", FrenchPhoneticTokenFilterFactory::new); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /es63X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | . 12 | true 13 | 14 | 15 | 16 | 17 | 18 | . 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es64X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | . 12 | true 13 | 14 | 15 | 16 | 17 | 18 | . 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es51X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es52X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es53X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es54X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es55X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es56X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es60X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es61X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es62X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es65X/src/assembly/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${project.basedir}/src/main/resources/plugin-descriptor.properties 11 | elasticsearch 12 | true 13 | 14 | 15 | 16 | 17 | 18 | elasticsearch 19 | true 20 | true 21 | 22 | 23 | org.elasticsearch:elasticsearch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /es51X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es52X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es53X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es54X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es55X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es56X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es60X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es61X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es62X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es63X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es64X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /es65X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticTokenFilterFactory.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.lucene.analysis.TokenStream; 4 | import org.elasticsearch.common.inject.Inject; 5 | import org.elasticsearch.common.inject.assistedinject.Assisted; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.env.Environment; 8 | import org.elasticsearch.index.IndexSettings; 9 | import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 10 | 11 | public class FrenchPhoneticTokenFilterFactory extends AbstractTokenFilterFactory { 12 | 13 | @Inject 14 | public FrenchPhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) { 15 | super(indexSettings, name, settings); 16 | } 17 | 18 | @Override 19 | public TokenStream create(TokenStream tokenStream) { 20 | return new FrenchPhoneticAnalyzer(tokenStream); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /analyzer-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | analyzer-core 13 | 14 | 15 | 16 | 17 | org.apache.maven.plugins 18 | maven-compiler-plugin 19 | 3.6.1 20 | 21 | 1.8 22 | 1.8 23 | UTF-8 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /analyzer-test/src/main/java/com/galerieslafayette/analyzer/client/ESClient.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer.client; 2 | 3 | import com.galerieslafayette.analyzer.document.MyDocument; 4 | import com.galerieslafayette.analyzer.document.SearchResponse; 5 | import feign.Body; 6 | import feign.Headers; 7 | import feign.Param; 8 | import feign.RequestLine; 9 | 10 | import java.io.File; 11 | import java.util.Map; 12 | 13 | @Headers({"Content-Type: application/json", "Accept: application/json"}) 14 | public interface ESClient { 15 | 16 | @RequestLine("PUT /my_index/my_type/{id}?refresh=wait_for") 17 | void addDocument(MyDocument myDocument, @Param("id")Long id); 18 | 19 | @RequestLine("POST /my_index/my_type/_search") 20 | @Body("%7B\"query\":%7B\"match\" : %7B\"content\" :%7B\"query\": \"{searchText}\"%7D%7D%7D%7D") 21 | SearchResponse search(@Param("searchText") String searchText); 22 | 23 | @RequestLine("DELETE /my_index/my_type/*") 24 | void deleteAll(); 25 | 26 | @RequestLine("PUT /my_index") 27 | void createIndex(String body); 28 | 29 | @RequestLine("PUT /my_index/_mapping/my_type") 30 | void applyMapping(String settings); 31 | } 32 | -------------------------------------------------------------------------------- /analyzer-test/src/test/resources/my_index_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "number_of_replicas":0, 3 | "number_of_shards": 1, 4 | "analysis": { 5 | "analyzer": { 6 | "phonetic_search": { 7 | "tokenizer": "text_only_tokenizer", 8 | "type": "custom", 9 | "filter": [ 10 | "french_elision", 11 | "word_delimiter_helper", 12 | "lowercase", 13 | "unique", 14 | "french-phonetic" 15 | ] 16 | }, 17 | "phonetic_index": { 18 | "tokenizer": "text_only_tokenizer", 19 | "type": "custom", 20 | "filter": [ 21 | "french_elision", 22 | "word_delimiter_helper", 23 | "lowercase", 24 | "french-phonetic" 25 | ] 26 | } 27 | }, 28 | "tokenizer": { 29 | "text_only_tokenizer": { 30 | "type": "pattern", 31 | "pattern": "[^a-z_A-Z\\u00C0-\\u00FF]" 32 | } 33 | }, 34 | "filter": { 35 | "french_elision": { 36 | "type": "elision", 37 | "articles_case": true, 38 | "articles": [ 39 | "l", 40 | "m", 41 | "t", 42 | "qu", 43 | "n", 44 | "s", 45 | "j", 46 | "d", 47 | "c", 48 | "jusqu", 49 | "quoiqu", 50 | "lorsqu", 51 | "puisqu" 52 | ] 53 | }, 54 | "french-phonetic": { 55 | "type": "french_phonetic" 56 | }, 57 | "word_delimiter_helper": { 58 | "catenate_all": false, 59 | "stem_english_possessive": false, 60 | "split_on_case_change": false, 61 | "type": "word_delimiter" 62 | } 63 | } 64 | } 65 | 66 | } -------------------------------------------------------------------------------- /es53X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es51X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es52X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es54X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es55X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es56X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es60X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es61X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es62X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es63X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es64X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /es65X/src/test/java/com/galerieslafayette/index/analysis/BenchmarkIT.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | import org.openjdk.jmh.results.RunResult; 6 | import org.openjdk.jmh.results.format.ResultFormatType; 7 | import org.openjdk.jmh.runner.Runner; 8 | import org.openjdk.jmh.runner.RunnerException; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | import org.openjdk.jmh.runner.options.TimeValue; 12 | 13 | import java.nio.file.Paths; 14 | import java.util.Collection; 15 | 16 | public class BenchmarkIT { 17 | 18 | @Test 19 | public void launchBenchmark() throws RunnerException { 20 | 21 | String targetFolder = Paths.get( 22 | this.getClass().getResource("/").getFile()).getParent().toString(); 23 | 24 | Options opt = new OptionsBuilder() 25 | .include(".*Benchmark") 26 | .warmupTime(TimeValue.seconds(1)) 27 | .warmupIterations(5) 28 | .measurementTime(TimeValue.milliseconds(100)) 29 | .measurementIterations(100) 30 | .threads(4) 31 | .forks(1) 32 | .shouldFailOnError(true) 33 | .shouldDoGC(true) 34 | .jvmArgs("-server") 35 | .resultFormat(ResultFormatType.JSON) 36 | .result(targetFolder+"/"+FrenchPhoneticBenchmark.class.getName() + ".jmh.json") 37 | .build(); 38 | 39 | Collection run = new Runner(opt).run(); 40 | run.stream() 41 | .filter(runResult -> "encodeStringFrenchPhonetic".equals(runResult.getPrimaryResult().getLabel())) 42 | .forEach( 43 | runResult -> 44 | Assertions.assertThat(runResult.getPrimaryResult().getScore()).isGreaterThan(6000) 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /analyzer-core/src/test/java/com/galerieslafayette/analyzer/EncoderTest.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.analyzer; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.junit.Test; 5 | 6 | public class EncoderTest { 7 | 8 | @Test 9 | public void testSubstringStartIndexGreaterThanLengthExpectedEmptyString() throws Exception { 10 | Encoder encoder = new Encoder(); 11 | String result = encoder.substring("E", 2, 4); 12 | Assertions.assertThat(result).isEqualTo(""); 13 | } 14 | @Test 15 | public void testSubstringStartIndexEqualsToLengthExpectedEmptyString() throws Exception { 16 | Encoder encoder = new Encoder(); 17 | String result = encoder.substring("E", 1, 4); 18 | Assertions.assertThat(result).isEqualTo(""); 19 | } 20 | 21 | @Test 22 | public void testSubstringStartIndexGreaterThanLengthExpectedNull() throws Exception { 23 | Encoder encoder = new Encoder(); 24 | String result = encoder.substring("E", 2, 4); 25 | Assertions.assertThat(result).isEmpty(); 26 | } 27 | 28 | @Test 29 | public void testSubstringStartIndexEqualsToLengthExpectedNull() throws Exception { 30 | Encoder encoder = new Encoder(); 31 | String result = encoder.substring("E", 1, 4); 32 | Assertions.assertThat(result).isEmpty(); 33 | } 34 | 35 | @Test 36 | public void testSubstringEndIndexEqualsToTailLength() throws Exception { 37 | Encoder encoder = new Encoder(); 38 | String result = encoder.substring("E", 1, 1); 39 | Assertions.assertThat(result).isEmpty(); 40 | } 41 | 42 | @Test 43 | public void testSubstringEndIndexGreaterToTailLength() throws Exception { 44 | Encoder encoder = new Encoder(); 45 | String result = encoder.substring("EE", 0, 2); 46 | Assertions.assertThat(result).isEqualTo("EE"); 47 | } 48 | 49 | @Test 50 | public void testSubstringEndIndexEqualsToStartIndex() throws Exception { 51 | Encoder encoder = new Encoder(); 52 | String result = encoder.substring("ERT", 2, 2); 53 | Assertions.assertThat(result).isEqualTo(""); 54 | } 55 | 56 | @Test 57 | public void testSubstringEndIndexLesserThanLength() throws Exception { 58 | Encoder encoder = new Encoder(); 59 | String result = encoder.substring("ERT", 1, 2); 60 | Assertions.assertThat(result).isEqualTo("R"); 61 | } 62 | 63 | @Test 64 | public void testSubstringEndIndexEqualsToLength() throws Exception { 65 | Encoder encoder = new Encoder(); 66 | String result = encoder.substring("ERT", 1, 3); 67 | Assertions.assertThat(result).isEqualTo("RT"); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /es60X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.0.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.0.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.0.${es60X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /es61X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.1.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.1.0 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.1.${es61X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /es62X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.2.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.2.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.2.${es62X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /es63X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.3.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.3.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.3.${es63X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /es64X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.4.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.4.0 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.4.${es64X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /es51X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.1.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.3.0 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.1.${es51X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es52X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.2.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.4.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.2.${es52X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es53X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.3.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.4.2 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.3.${es53X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es54X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.4.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.5.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.4.${es54X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es55X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.5.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.6.0 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.5.${es55X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es56X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-5.6.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 6.6.1 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 5.6.${es56X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /es65X/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | french-phonetic-analyzer 7 | com.galerieslafayette.analyzer 8 | 2.0.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-6.5.X 13 | French phonetic analyzer that encodes string as pronounced 14 | 15 | 16 | 7.5.0 17 | 18 | 19 | 20 | 21 | com.galerieslafayette.analyzer 22 | analyzer-core 23 | 24 | 25 | 26 | org.elasticsearch 27 | elasticsearch 28 | 6.5.${es65X.version} 29 | 30 | 31 | 32 | org.apache.lucene 33 | lucene-analyzers-phonetic 34 | ${lucene.version} 35 | 36 | 37 | 38 | 39 | french-phonetic-analyzer.${project.artifactId}.${project.version} 40 | 41 | 42 | src/main/resources 43 | false 44 | 45 | *.properties 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.6.1 54 | 55 | 1.8 56 | 1.8 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | false 64 | 65 | src/assembly/plugin.xml 66 | 67 | french-phonetic-analyzer.${project.artifactId}.${project.version} 68 | 69 | 70 | 71 | package 72 | 73 | single 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # French-phonetic-analyser plugin (token filter) 2 | 3 | ## Que fait ce plugin / token filter? 4 | 5 | Ce token filter permet d'encoder le texte à partir des phonèmes prononcés dans la langue française. 6 | 7 | ## Pourquoi ce plugin plutôt que ceux qui existent déjà? 8 | 9 | Ce plugin a été créé car l'utilisation des plugins existants (soundex...) 10 | ramenaient plus de résultats que ce qui était recherché. cf: https://blog.ippon.fr/2016/03/02/elasticsearch-tu-tentends-quand-tu-analyses/ 11 | 12 | ## Comment le plugin fonctionne-t-il? 13 | 14 | Le plugin fonctionne de la même façon qu'un enfant lirait un texte ou l'écrirait sans connaître l'orthographe. 15 | 16 | Le texte est décomposé de gauche à droite avec une lecture de quelques caractères qui suivent la lettre courante. 17 | 18 | La transcription de certains phonèmes est codé de la façon suivante: 19 | 20 | |Valeur encodée | Son à encoder | 21 | |-----------------|-----------------| 22 | | 1 | in | 23 | | 2 | é | 24 | | 3 | an | 25 | | 4 | on | 26 | | 5 | s | 27 | | 8 | oeu/eu | 28 | 29 | A voir Prononciation des graphèmes sur Wikipedia 30 | 31 | 32 | ## Comment obtenir la version du plugin pour la version de l'elasticsearch que j'utilise: 33 | 34 | Le plugin est décliné pour chaque version majeure et mineure d'elasticsearch. 35 | 36 | Ce sont lors de ces versions que les montées de versions de lucène sont effectuées, ce plugin utilise aussi lucène, il est donc versionné de cette façon 5.6.X. 37 | 38 | **ATTENTION**: Un plugin ne fonctionne que s'il a été buildé pour la version cible d'elasticsearch. La version est inscrite dans le fichier plugin-descriptor.properties 39 | 40 | ### Compiler la version désirée: 41 | 42 | 43 | ```shell 44 | mvn clean install -Prun-its -DesYYX.version=Z 45 | ``` 46 | Remplacer YY par la version majeure et mineure d'elasticsearch et laisser le X tel quel. Remplacer le Z par la sous mineure désirée. 47 | 48 | Par défaut des tests de performances JMH sont lancés ainsi qu'un test du plugin généré sur l'elasticsearch correspondant. 49 | 50 | Si vous ne mettez pas le paramètre esYYX.version alors une version est prise par défaut et elle ne correspondra sûrement pas à la vôtre. Voir le pom du module correspondant pour la verison par défaut. 51 | 52 | Le plugin se trouve ensuite dans le module maven pour la version d'elasticsearch désirée et dans ce module sous l'arborescence suivante: ***/target/xxxxx.zip*** 53 | 54 | ### Release note: 55 | 56 | | Version | Contenu | 57 | |-----------------|--------------------------------------------------------------------------------------------------------| 58 | | 1.0.0 | Le plugin encode phonétiquement. 1 token <=> 1 token encodé | 59 | | 2.0.0 | Le plugin encode phonétiquement de différentes manières un même token. 1 token <=> 1...X tokens encodés| 60 | | 2.0.0 | Version 6.5.X supportée. Packaging revu pour les versions plus récentes que 6.3.X | 61 | 62 | 63 | ## Contributeurs: 64 | Merci aux ***Galeries Lafayette*** d'avoir permis de rendre le code open-source et à ces différents contributeurs: 65 | 66 | * Harold Capitaine 67 | * Yves Mathieu Rideau Baudin 68 | * Alexandre Pocheau 69 | * Jonathan Baranzini 70 | -------------------------------------------------------------------------------- /es51X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es52X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es53X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es54X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es55X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es56X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es60X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es61X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es62X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es63X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es64X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es65X/src/main/java/com/galerieslafayette/index/analysis/FrenchPhoneticAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import com.galerieslafayette.analyzer.Encoder; 4 | import org.apache.lucene.analysis.TokenFilter; 5 | import org.apache.lucene.analysis.TokenStream; 6 | import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; 7 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 8 | import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 9 | 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Locale; 15 | 16 | import static com.galerieslafayette.analyzer.Encoder.SOUND_2_ACCENTUATED_CHARS; 17 | 18 | public class FrenchPhoneticAnalyzer extends TokenFilter { 19 | 20 | private TokenStream input; 21 | private List stateTokens = new ArrayList<>(); 22 | private int currentIndex = 0; 23 | private CharTermAttribute termAtt; 24 | private PositionIncrementAttribute posIncrAtt; 25 | private State state; 26 | 27 | 28 | public FrenchPhoneticAnalyzer(TokenStream input) { 29 | super(input); 30 | this.input = input; 31 | termAtt = addAttribute(CharTermAttribute.class); 32 | posIncrAtt = addAttribute(PositionIncrementAttribute.class); 33 | } 34 | 35 | @Override 36 | public void reset() throws IOException { 37 | super.reset(); 38 | stateTokens.clear(); 39 | currentIndex = 0; 40 | state = null; 41 | } 42 | 43 | @Override 44 | public final boolean incrementToken() throws IOException { 45 | if (state == null) { 46 | currentIndex = 0; 47 | stateTokens.clear(); 48 | if(input.incrementToken()) { 49 | List encodedTokens = encode(termAtt.toString()); 50 | termAtt.setEmpty().append(encodedTokens.get(0)); 51 | posIncrAtt.setPositionIncrement(1); 52 | if (encodedTokens.size() > 1) { 53 | state = captureState(); 54 | stateTokens.addAll(encodedTokens); 55 | } 56 | currentIndex++; 57 | return true; 58 | } 59 | 60 | } else { 61 | if (stateTokens.size() > currentIndex) { 62 | restoreState(state); 63 | termAtt.setEmpty().append(stateTokens.get(currentIndex)); 64 | posIncrAtt.setPositionIncrement(0); 65 | currentIndex++; 66 | if(currentIndex>= stateTokens.size()){ 67 | state = null; 68 | } 69 | return true; 70 | } 71 | } 72 | 73 | return false; 74 | } 75 | 76 | @Override 77 | public void end() throws IOException { 78 | super.end(); 79 | state = null; 80 | currentIndex = 0; 81 | stateTokens.clear(); 82 | } 83 | 84 | public List encode(String input){ 85 | if (input == null || input.length() == 0) { 86 | return Arrays.asList(input); 87 | } 88 | int len = input.length(); 89 | String upperStr = input.toUpperCase(Locale.FRENCH); 90 | 91 | char[] chars = new char[len]; 92 | int count = 0; 93 | for (int i = 0; i < len; i++) { 94 | if (Character.isLetter(upperStr.charAt(i))) { 95 | if (SOUND_2_ACCENTUATED_CHARS.contains(upperStr.charAt(i))) { 96 | chars[count++] = '2'; 97 | } else { 98 | chars[count++] = upperStr.charAt(i); 99 | } 100 | } 101 | } 102 | char[] res = new char[count]; 103 | int finalSize = ASCIIFoldingFilter.foldToASCII(chars, 0, res, 0, count); 104 | String cleanedString = new String(chars, 0, finalSize); 105 | return new ArrayList<>(Encoder.operatePhonetic("", Encoder.charAt(cleanedString, 0), Encoder.substring(cleanedString, 1, cleanedString.length()))); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /es51X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es52X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es53X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es54X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es55X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es56X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es60X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es61X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /es62X/src/test/java/com/galerieslafayette/index/analysis/FrenchPhoneticBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.galerieslafayette.index.analysis; 2 | 3 | import org.apache.commons.codec.EncoderException; 4 | import org.apache.commons.codec.language.RefinedSoundex; 5 | import org.apache.commons.codec.language.bm.BeiderMorseEncoder; 6 | import org.apache.commons.codec.language.bm.NameType; 7 | import org.apache.commons.codec.language.bm.RuleType; 8 | import org.openjdk.jmh.annotations.*; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | import java.nio.file.Files; 13 | import java.nio.file.Paths; 14 | import java.util.List; 15 | import java.util.concurrent.TimeUnit; 16 | import java.util.stream.Stream; 17 | 18 | @State(Scope.Benchmark) 19 | public class FrenchPhoneticBenchmark { 20 | 21 | @State(Scope.Benchmark) 22 | public static class FrenchPhoneticFactory 23 | { 24 | FrenchPhoneticAnalyzer instance; 25 | 26 | @Setup(Level.Trial) 27 | public void initialize() 28 | { 29 | instance = new FrenchPhoneticAnalyzer(new FakeTokenStream()); 30 | } 31 | 32 | @TearDown(Level.Trial) 33 | public void shutdown() 34 | { 35 | // Nothing to do 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class RefinedSoundexFactory 41 | { 42 | RefinedSoundex instance; 43 | 44 | @Setup(Level.Trial) 45 | public void initialize() 46 | { 47 | instance = new RefinedSoundex(); 48 | } 49 | 50 | @TearDown(Level.Trial) 51 | public void shutdown() 52 | { 53 | // Nothing to do 54 | } 55 | } 56 | 57 | @State(Scope.Benchmark) 58 | public static class BeiderMorseFactory 59 | { 60 | BeiderMorseEncoder instance; 61 | 62 | @Setup(Level.Trial) 63 | public void initialize() 64 | { 65 | instance = new BeiderMorseEncoder(); 66 | instance.setNameType(NameType.GENERIC); 67 | instance.setConcat(true); 68 | instance.setRuleType(RuleType.APPROX); 69 | } 70 | 71 | @TearDown(Level.Trial) 72 | public void shutdown() 73 | { 74 | // Nothing to do 75 | } 76 | } 77 | 78 | protected String data; 79 | 80 | private String getWordFromDictionnary() throws URISyntaxException, IOException { 81 | Stream lines = Files.lines(Paths.get(FrenchPhoneticBenchmark.class.getResource("/fr-classique.dic").toURI())); 82 | return lines.findAny().map(line -> { 83 | int indexSlash = line.indexOf('/'); 84 | int indexSpace = line.indexOf(' '); 85 | if(indexSlash != -1 && indexSlash encodeStringFrenchPhonetic(FrenchPhoneticFactory frenchPhonetic) throws EncoderException { 124 | return frenchPhonetic.instance.encode(data); 125 | } 126 | 127 | @Benchmark 128 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 129 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 130 | @Fork(8) 131 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 132 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 133 | @Threads(4) 134 | public String encodeStringRefinedSoundex(RefinedSoundexFactory refinedSoundex) throws EncoderException { 135 | return refinedSoundex.instance.encode(data); 136 | } 137 | 138 | @Benchmark 139 | @BenchmarkMode({Mode.Throughput/* Mode.AverageTime/*, Mode.SampleTime*/}) 140 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 141 | @Fork(8) 142 | @Measurement(iterations = 10, time= 1, timeUnit = TimeUnit.SECONDS) 143 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 144 | @Threads(4) 145 | public String encodeStringBeiderMorse(BeiderMorseFactory beiderMorse) throws EncoderException { 146 | return beiderMorse.instance.encode(data); 147 | } 148 | } 149 | --------------------------------------------------------------------------------