├── licenses
├── duke-NOTICE.txt
├── duke-1.2.jar.sha1
├── mapdb-0.9.9.jar.sha1
├── mapdb-NOTICE.txt
├── duke-LICENSE.txt
└── mapdb-LICENSE.txt
├── .travis.yml
├── src
├── test
│ ├── resources
│ │ ├── log4j.properties
│ │ └── rest-api-spec
│ │ │ └── test
│ │ │ └── nativescript
│ │ │ └── 10_test_loaded.yaml
│ └── java
│ │ └── org
│ │ └── yaba
│ │ └── entity
│ │ ├── plugin
│ │ └── EntityResolutionRestIT.java
│ │ └── script
│ │ ├── AbstractSearchScriptTestCase.java
│ │ ├── NeutralFieldsTests.java
│ │ ├── WeightedLevenshteinComparatorTests.java
│ │ ├── EntityResolutionScriptScoreTests.java
│ │ └── JaccardIndexComparatorTests.java
└── main
│ ├── assemblies
│ └── plugin.xml
│ └── java
│ └── org
│ └── yaba
│ └── entity
│ ├── plugin
│ └── EntityResolutionPlugin.java
│ ├── config
│ └── EntityConfiguration.java
│ └── script
│ └── EntityResolutionScript.java
├── .gitignore
├── .gitattributes
├── dev-tools
└── src
│ └── main
│ └── resources
│ └── license-check
│ ├── license-header-definition.xml
│ └── entity-resolution-license-header.txt
├── README.md
├── pom.xml
└── LICENSE
/licenses/duke-NOTICE.txt:
--------------------------------------------------------------------------------
1 | https://github.com/larsga/duke
--------------------------------------------------------------------------------
/licenses/duke-1.2.jar.sha1:
--------------------------------------------------------------------------------
1 | c570d23c3f44423dd782cfe9143bcba1ea6f303b
--------------------------------------------------------------------------------
/licenses/mapdb-0.9.9.jar.sha1:
--------------------------------------------------------------------------------
1 | 3013576b4884b39321f7d4d5ac1a26e232d5f7b8
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | branches:
3 | except:
4 | - travis
5 | sudo: false
6 |
--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, out
2 |
3 | log4j.appender.out=org.apache.log4j.ConsoleAppender
4 | log4j.appender.out.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Temp and test files ignores
2 | work/
3 | data/
4 | logs/
5 | build/
6 | target/
7 | .DS_Store
8 | *-execution-hints.log
9 |
10 | # IDEA ignores
11 | .idea/
12 | *.iml
13 |
14 | ## eclipse ignores
15 | .project
16 | .classpath
17 | .settings
18 | */.project
19 | */.classpath
20 | */.settings
21 | */eclipse-build
22 |
23 | ## netbeans ignores
24 | nb-configuration.xml
25 | nbactions.xml
26 |
--------------------------------------------------------------------------------
/src/test/resources/rest-api-spec/test/nativescript/10_test_loaded.yaml:
--------------------------------------------------------------------------------
1 | # Dummy integration tests
2 | #
3 | "entity-resolution loaded":
4 | - do:
5 | cluster.state: {}
6 |
7 | # Get master node id
8 | - set: { master_node: master }
9 |
10 | - do:
11 | nodes.info: {}
12 |
13 | - match: { nodes.$master.plugins.0.name: "elasticsearch-entity-resolution-plugin" }
14 |
15 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 | *.sln merge=union
7 | *.csproj merge=union
8 | *.vbproj merge=union
9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 |
12 | # Standard to msysgit
13 | *.doc diff=astextplain
14 | *.DOC diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot diff=astextplain
18 | *.DOT diff=astextplain
19 | *.pdf diff=astextplain
20 | *.PDF diff=astextplain
21 | *.rtf diff=astextplain
22 | *.RTF diff=astextplain
23 |
--------------------------------------------------------------------------------
/dev-tools/src/main/resources/license-check/license-header-definition.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | /*
5 | *
6 | */
7 |
8 | (\s|\t)*/\*.*$
9 | .*\*/(\s|\t)*$
10 | false
11 | true
12 |
13 |
14 |
--------------------------------------------------------------------------------
/dev-tools/src/main/resources/license-check/entity-resolution-license-header.txt:
--------------------------------------------------------------------------------
1 | Licensed under the Apache License, Version 2.0 (the "License");
2 | you may not use this file except in compliance with the License.
3 | You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 |
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
--------------------------------------------------------------------------------
/src/main/assemblies/plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | plugin
4 |
5 | zip
6 |
7 | false
8 |
9 |
10 | /
11 | true
12 | true
13 |
14 | org.elasticsearch:elasticsearch
15 |
16 |
17 |
18 |
19 |
20 | ${elasticsearch.tools.directory}/plugin-metadata/plugin-descriptor.properties
21 |
22 | true
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/main/java/org/yaba/entity/plugin/EntityResolutionPlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.yaba.entity.plugin;
16 |
17 | import org.elasticsearch.plugins.Plugin;
18 | import org.elasticsearch.script.ScriptModule;
19 | import org.yaba.entity.script.EntityResolutionScript;
20 |
21 | public class EntityResolutionPlugin extends Plugin {
22 | @Override
23 | public final String name() {
24 | return "entity-resolution-plugin";
25 | }
26 |
27 | @Override
28 | public final String description() {
29 | return "Bayesian based entity resolution plugin";
30 | }
31 |
32 | public void onModule(ScriptModule module) {
33 | // Register each script that we defined in this plugin
34 | module.registerScript("entity-resolution", EntityResolutionScript.Factory.class);
35 | }
36 | }
--------------------------------------------------------------------------------
/src/test/java/org/yaba/entity/plugin/EntityResolutionRestIT.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.yaba.entity.plugin;
15 |
16 | import com.carrotsearch.randomizedtesting.annotations.Name;
17 | import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
18 | import org.elasticsearch.test.rest.ESRestTestCase;
19 | import org.elasticsearch.test.rest.RestTestCandidate;
20 | import org.elasticsearch.test.rest.parser.RestTestParseException;
21 |
22 | import java.io.IOException;
23 |
24 | public class EntityResolutionRestIT extends ESRestTestCase {
25 |
26 | public EntityResolutionRestIT(@Name("yaml") RestTestCandidate testCandidate) {
27 | super(testCandidate);
28 | }
29 |
30 | @ParametersFactory
31 | public static Iterable