├── .gitignore
├── LICENSE
├── README.md
├── adel-admin-server
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── admin
│ │ └── server
│ │ └── ADELAdminServer.java
│ └── resources
│ └── bootstrap.yaml
├── adel-api-gateway
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── api
│ │ └── gateway
│ │ ├── ADELAPIGateway.java
│ │ └── filters
│ │ └── LanguageFilter.java
│ └── resources
│ └── bootstrap.yaml
├── adel-api
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── api
│ │ ├── ADELAPIApplication.java
│ │ ├── configuration
│ │ ├── BasePathAwareRelativePathProvider.java
│ │ └── SwaggerDocumentationConfig.java
│ │ ├── controllers
│ │ ├── EntityRecognition.java
│ │ └── Utilities.java
│ │ ├── converters
│ │ └── DocumentConverter.java
│ │ └── formatter
│ │ ├── DocumentFormatter.java
│ │ ├── ProfileFormatter.java
│ │ └── RecognitionProfile.java
│ └── resources
│ └── bootstrap.yaml
├── adel-commons
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── commons
│ │ ├── datatypes
│ │ ├── Document.java
│ │ ├── Entity.java
│ │ ├── Sentence.java
│ │ ├── Token.java
│ │ └── TweetEntity.java
│ │ ├── exceptions
│ │ ├── CoNLLMalformedException.java
│ │ ├── NIFMalformedException.java
│ │ └── TACMalformedException.java
│ │ ├── formats
│ │ ├── CoNLL.java
│ │ ├── NIF.java
│ │ └── TAC.java
│ │ ├── utils
│ │ ├── RDFUtils.java
│ │ ├── ReflectionUtils.java
│ │ ├── ScoringUtils.java
│ │ ├── StringUtils.java
│ │ └── TweetUtils.java
│ │ └── validators
│ │ ├── AlreadyExists.java
│ │ ├── AlreadyExistsValidator.java
│ │ ├── File.java
│ │ ├── FileValidator.java
│ │ ├── MustExists.java
│ │ ├── MustExistsValidator.java
│ │ ├── Name.java
│ │ ├── OneOf.java
│ │ ├── OneOfValidator.java
│ │ ├── Readable.java
│ │ ├── ReadableValidator.java
│ │ ├── URL.java
│ │ ├── URLValidator.java
│ │ ├── Writable.java
│ │ └── WritableValidator.java
│ └── resources
│ ├── ValidationMessages.properties
│ ├── ValidationMessages_fr.properties
│ └── banner.txt
├── adel-config-server
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── config
│ │ └── server
│ │ └── ADELConfigServer.java
│ └── resources
│ └── bootstrap.yaml
├── adel-discovery-server
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── discovery
│ │ └── server
│ │ └── ADELDiscoveryServer.java
│ └── resources
│ └── bootstrap.yaml
├── adel-hystrix-dashboard
├── pom.xml
└── src
│ └── main
│ ├── docker
│ └── docker-assembly.xml
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── hystrix
│ │ └── dashboard
│ │ └── ADELHystrixDashboard.java
│ └── resources
│ └── bootstrap.yaml
├── adel-indexing
├── pom.xml
└── src
│ └── main
│ └── java
│ └── fr
│ └── eurecom
│ └── adel
│ └── indexing
│ └── App.java
├── adel-linking
├── pom.xml
└── src
│ └── main
│ └── java
│ └── fr
│ └── eurecom
│ └── adel
│ └── linking
│ └── App.java
├── adel-recognition
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── fr
│ │ │ └── eurecom
│ │ │ └── adel
│ │ │ └── recognition
│ │ │ ├── RecognitionSetup.java
│ │ │ ├── configuration
│ │ │ ├── AnnotatorConfig.java
│ │ │ ├── RecognitionConfig.java
│ │ │ ├── TweetNormalizationConfig.java
│ │ │ └── TypeOverlappingConfig.java
│ │ │ ├── domain
│ │ │ └── repositories
│ │ │ │ ├── AnnotatorRepository.java
│ │ │ │ ├── HashtagSegmentationRepository.java
│ │ │ │ ├── MentionOverlapResolutionRepository.java
│ │ │ │ ├── TypeOverlapResolutionRepository.java
│ │ │ │ └── UserMentionDereferencingRepository.java
│ │ │ ├── exceptions
│ │ │ ├── MappingNotExistsException.java
│ │ │ └── TypeNotExistsException.java
│ │ │ ├── implementation
│ │ │ └── repositories
│ │ │ │ ├── annotator
│ │ │ │ ├── jsonapi
│ │ │ │ │ └── JSONAPIAnnotatorRepository.java
│ │ │ │ └── stanfordcorenlp
│ │ │ │ │ └── StanfordCoreNLPAnnotatorRepository.java
│ │ │ │ ├── hashtagsegmentation
│ │ │ │ └── dictionarybased
│ │ │ │ │ └── DictionaryBasedHashtagSegmentation.java
│ │ │ │ ├── mentionoverlapresolution
│ │ │ │ └── merge
│ │ │ │ │ └── MergeMentionOverlapResolution.java
│ │ │ │ ├── typeoverlapresolution
│ │ │ │ └── majorityvoting
│ │ │ │ │ └── MajorityVotingTypeOverlapResolution.java
│ │ │ │ └── usermentiondereferencing
│ │ │ │ └── httpquery
│ │ │ │ └── HTTPQueryUserMentionDereferencing.java
│ │ │ ├── usecases
│ │ │ ├── Annotator.java
│ │ │ ├── OverlapResolution.java
│ │ │ ├── RecognitionPipeline.java
│ │ │ └── TweetNormalization.java
│ │ │ └── validators
│ │ │ ├── ContentPriorityList.java
│ │ │ ├── ContentPriorityListValidator.java
│ │ │ ├── HasTokenizer.java
│ │ │ ├── HasTokenizerValidator.java
│ │ │ ├── NameExistsForRecognition.java
│ │ │ ├── NameExistsForRecognitionValidator.java
│ │ │ ├── SizePriorityList.java
│ │ │ ├── SizePriorityListValidator.java
│ │ │ ├── UniqueName.java
│ │ │ └── UniqueNameValidator.java
│ └── resources
│ │ ├── dictionaries
│ │ ├── en_bigrams.tsv
│ │ └── en_unigrams.tsv
│ │ ├── mappings
│ │ ├── CoNLL2DBpedia.map
│ │ ├── CoNLL2DUL.map
│ │ ├── CoNLL2MUC.map
│ │ ├── CoNLL2Musicbrainz.map
│ │ ├── CoNLL2NEEL.map
│ │ ├── DBpedia2CoNLL.map
│ │ ├── DBpedia2DUL.map
│ │ ├── DBpedia2NEEL.map
│ │ └── MUC2CoNLL.map
│ │ └── stanford-full-en.properties
│ └── test
│ └── java
│ └── fr
│ └── eurecom
│ └── adel
│ └── recognition
│ └── implementation
│ └── repositories
│ ├── mentionoverlapresolution
│ └── merge
│ │ └── MergeMentionOverlapResolutionTest.java
│ └── typeoverlapresolution
│ └── majorityvoting
│ └── MajorityVotingTypeOverlapResolutionTest.java
├── adel-shell
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── fr
│ │ └── eurecom
│ │ └── adel
│ │ └── shell
│ │ ├── ADELShellApplication.java
│ │ ├── command
│ │ └── ADELCommands.java
│ │ └── setting
│ │ └── SimplePromptProvider.java
│ └── resources
│ └── bootstrap.yaml
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### JetBrains template
3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
5 |
6 | # Intellij folder
7 | .idea/
8 |
9 | # Intellij project file
10 | *.iml
11 |
12 | # CMake
13 | cmake-build-debug/
14 | cmake-build-release/
15 |
16 | # File-based project format
17 | *.iws
18 |
19 | # IntelliJ
20 | out/
21 |
22 | # Maven ignores
23 | dependency-reduced-pom.xml
24 |
25 | # mpeltonen/sbt-idea plugin
26 | .idea_modules/
27 |
28 | # JIRA plugin
29 | atlassian-ide-plugin.xml
30 |
31 | # Crashlytics plugin (for Android Studio and IntelliJ)
32 | com_crashlytics_export_strings.xml
33 | crashlytics.properties
34 | crashlytics-build.properties
35 | fabric.properties
36 |
37 | ### Java template
38 | # Compiled class file
39 | *.class
40 |
41 | # Log file
42 | logs/
43 | *.log
44 |
45 | # BlueJ files
46 | *.ctxt
47 |
48 | # Mobile Tools for Java (J2ME)
49 | .mtj.tmp/
50 |
51 | # Package Files #
52 | *.war
53 | *.nar
54 | *.ear
55 | *.zip
56 | *.tar.gz
57 | *.rar
58 | */target/
59 |
60 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
61 | hs_err_pid*
62 |
63 | ### Data files
64 | *.conll
65 | *.tac
66 | *.nif
67 | *.txt
68 |
69 | ### ADEL configuration file
70 | !adel-commons/src/main/resources/banner.txt
71 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2019] [Julien Plu]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/adel-admin-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | adel-admin-server
7 | ${revision}
8 | adel-admin-server
9 | http://adel.eurecom.fr
10 |
11 |
12 | adel-pom
13 | fr.eurecom.adel
14 | ${revision}
15 |
16 |
17 |
18 |
19 | org.springframework.boot
20 | spring-boot-starter
21 |
22 |
23 |
24 | org.springframework.cloud
25 | spring-cloud-starter-config
26 | ${spring.cloud.starter.config.version}
27 |
28 |
29 |
30 | org.springframework.cloud
31 | spring-cloud-starter-netflix-eureka-client
32 | ${spring.cloud.starter.netflix.eureka.client.version}
33 |
34 |
35 |
36 | de.codecentric
37 | spring-boot-admin-starter-server
38 | ${spring.boot.admin.starter.server.version}
39 |
40 |
41 |
42 | de.codecentric
43 | spring-boot-admin-server-ui
44 | ${spring.boot.admin.server.ui.version}
45 |
46 |
47 |
48 | org.springframework.boot
49 | spring-boot-starter-aop
50 |
51 |
52 |
53 | org.springframework.retry
54 | spring-retry
55 | ${spring.retry.version}
56 |
57 |
58 |
59 |
60 |
61 |
62 | org.springframework.boot
63 | spring-boot-maven-plugin
64 |
65 |
66 |
67 | io.fabric8
68 | docker-maven-plugin
69 | ${docker.maven.plugin.version}
70 |
71 |
72 |
73 | ${project.artifactId}
74 | jplu/${project.artifactId}:${revision}
75 |
76 | openjdk:oracle
77 | Julien Plu
78 |
79 | docker-assembly.xml
80 |
81 | /maven
82 |
83 | java -jar ${project.build.finalName}.jar
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 | org.apache.maven.plugins
92 | maven-compiler-plugin
93 |
94 | 13
95 | 13
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/adel-admin-server/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-admin-server/src/main/java/fr/eurecom/adel/admin/server/ADELAdminServer.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.admin.server;
2 |
3 | import de.codecentric.boot.admin.server.config.EnableAdminServer;
4 |
5 | import org.springframework.boot.SpringApplication;
6 | import org.springframework.boot.autoconfigure.SpringBootApplication;
7 | import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
8 |
9 | /**
10 | * @author Julien Plu on 2019-03-26.
11 | */
12 | @SpringBootApplication
13 | @EnableAdminServer
14 | @EnableDiscoveryClient
15 | public class ADELAdminServer {
16 | public static void main(final String... args) {
17 | SpringApplication.run(ADELAdminServer.class, args);
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/adel-admin-server/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | failFast: true
6 | retry:
7 | initialInterval: 3000
8 | multiplier: 1.3
9 | maxInterval: 5000
10 | maxAttempts: 20
11 | application:
12 | name: admin-server
13 | eureka:
14 | client:
15 | serviceUrl:
16 | defaultZone: ${EUREKA_URI:http://localhost:8761/eureka}
--------------------------------------------------------------------------------
/adel-api-gateway/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | adel-api-gateway
7 | ${revision}
8 | adel-api-gateway
9 | http://adel.eurecom.fr
10 |
11 |
12 | fr.eurecom.adel
13 | adel-pom
14 | ${revision}
15 |
16 |
17 |
18 |
19 | org.springframework.cloud
20 | spring-cloud-starter-netflix-eureka-client
21 | ${spring.cloud.starter.netflix.eureka.client.version}
22 |
23 |
24 |
25 | org.springframework.cloud
26 | spring-cloud-starter-netflix-zuul
27 | ${spring.cloud.starter.netflix.zuul.version}
28 |
29 |
30 |
31 | org.springframework.cloud
32 | spring-cloud-starter-config
33 | ${spring.cloud.starter.config.version}
34 |
35 |
36 |
37 | org.springframework.boot
38 | spring-boot-starter-aop
39 |
40 |
41 |
42 | org.springframework.retry
43 | spring-retry
44 | ${spring.retry.version}
45 |
46 |
47 |
48 | com.github.pemistahl
49 | lingua
50 | ${lingua.version}
51 |
52 |
53 |
54 | org.json
55 | json
56 | ${json.version}
57 |
58 |
59 |
60 |
61 |
62 |
63 | org.springframework.boot
64 | spring-boot-maven-plugin
65 |
66 |
67 |
68 | io.fabric8
69 | docker-maven-plugin
70 | ${docker.maven.plugin.version}
71 |
72 |
73 |
74 | ${project.artifactId}
75 | jplu/${project.artifactId}:${revision}
76 |
77 | openjdk:oracle
78 | Julien Plu
79 |
80 | docker-assembly.xml
81 |
82 | /maven
83 |
84 | java -jar ${project.build.finalName}.jar
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 | org.apache.maven.plugins
93 | maven-compiler-plugin
94 |
95 | 13
96 | 13
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/adel-api-gateway/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-api-gateway/src/main/java/fr/eurecom/adel/api/gateway/ADELAPIGateway.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.gateway;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
6 | import org.springframework.cloud.netflix.zuul.EnableZuulProxy;
7 | import org.springframework.context.annotation.Bean;
8 |
9 | import fr.eurecom.adel.api.gateway.filters.LanguageFilter;
10 |
11 | /**
12 | * @author Julien Plu on 2019-03-27.
13 | */
14 | @EnableZuulProxy
15 | @EnableDiscoveryClient
16 | @SpringBootApplication
17 | public class ADELAPIGateway {
18 | public static void main(final String... args) {
19 | SpringApplication.run(ADELAPIGateway.class, args);
20 | }
21 |
22 | @Bean
23 | public LanguageFilter languageFilter() {
24 | return new LanguageFilter();
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/adel-api-gateway/src/main/java/fr/eurecom/adel/api/gateway/filters/LanguageFilter.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.gateway.filters;
2 |
3 | import com.github.pemistahl.lingua.api.IsoCode639_1;
4 | import com.netflix.zuul.ZuulFilter;
5 | import com.netflix.zuul.context.RequestContext;
6 | import com.netflix.zuul.exception.ZuulException;
7 | import com.github.pemistahl.lingua.api.LanguageDetectorBuilder;
8 | import com.github.pemistahl.lingua.api.LanguageDetector;
9 |
10 | import javax.servlet.http.HttpServletRequest;
11 |
12 | import org.json.JSONException;
13 | import org.json.JSONObject;
14 | import org.springframework.cloud.netflix.zuul.filters.support.FilterConstants;
15 | import org.springframework.web.util.WebUtils;
16 |
17 | import java.io.IOException;
18 | import java.util.stream.Collectors;
19 |
20 | /**
21 | * @author Julien Plu on 2019-04-01.
22 | */
23 | public class LanguageFilter extends ZuulFilter {
24 | private final LanguageDetector detector;
25 |
26 | public LanguageFilter() {
27 | this.detector = LanguageDetectorBuilder.fromIsoCodes639_1(
28 | IsoCode639_1.FR,
29 | IsoCode639_1.EN).build();
30 | }
31 |
32 | @Override
33 | public final String filterType() {
34 | return FilterConstants.PRE_TYPE;
35 | }
36 |
37 | @Override
38 | public final int filterOrder() {
39 | return FilterConstants.PRE_DECORATION_FILTER_ORDER - 1;
40 | }
41 |
42 | @Override
43 | public final boolean shouldFilter() {
44 | return true;
45 | }
46 |
47 | @Override
48 | public final Object run() throws ZuulException {
49 | final RequestContext ctx = RequestContext.getCurrentContext();
50 | final HttpServletRequest req = ctx.getRequest();
51 | String body = "";
52 |
53 | try {
54 | body = req.getReader().lines().collect(Collectors.joining());
55 | } catch (final IOException ex) {
56 | throw new ZuulException(ex, 500, ex.getMessage());
57 | }
58 |
59 | final JSONObject json = new JSONObject(body);
60 |
61 | String txt = "";
62 |
63 | try {
64 | txt = json.getString("text");
65 | } catch (final JSONException ex) {
66 | throw new ZuulException("The body of the request in not properly formed", 500, "The body" +
67 | "has to be a JSON with a \"text\" property");
68 | }
69 |
70 | final String language = this.detector.detectLanguageOf(txt).getIsoCode639_1().toString();
71 |
72 | req.setAttribute(WebUtils.INCLUDE_REQUEST_URI_ATTRIBUTE, req.getRequestURI().replace("adel",
73 | "adel-" + language));
74 |
75 | return null;
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/adel-api-gateway/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | failFast: true
6 | retry:
7 | initialInterval: 3000
8 | multiplier: 1.3
9 | maxInterval: 5000
10 | maxAttempts: 20
11 | application:
12 | name: api-gateway
13 | eureka:
14 | client:
15 | serviceUrl:
16 | defaultZone: ${EUREKA_URI:http://localhost:8761/eureka}
--------------------------------------------------------------------------------
/adel-api/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-api
6 | ${revision}
7 | adel-api
8 | http://adel.eurecom.fr
9 |
10 |
11 | fr.eurecom.adel
12 | adel-pom
13 | ${revision}
14 |
15 |
16 |
17 | en
18 |
19 |
20 |
21 |
22 | org.springframework.boot
23 | spring-boot-starter-actuator
24 |
25 |
26 |
27 | org.springframework.boot
28 | spring-boot-starter-web
29 |
30 |
31 |
32 | org.springframework.cloud
33 | spring-cloud-starter-config
34 | ${spring.cloud.starter.config.version}
35 |
36 |
37 |
38 | org.springframework.cloud
39 | spring-cloud-starter-netflix-eureka-client
40 | ${spring.cloud.starter.netflix.eureka.client.version}
41 |
42 |
43 |
44 | org.springframework.boot
45 | spring-boot-starter-aop
46 |
47 |
48 |
49 | org.springframework.retry
50 | spring-retry
51 | ${spring.retry.version}
52 |
53 |
54 |
55 | org.zalando
56 | logbook-spring-boot-starter
57 | ${logbook.spring.boot.starter.version}
58 |
59 |
60 |
61 | fr.eurecom.adel
62 | adel-recognition
63 | ${revision}
64 |
65 |
66 |
67 | org.projectlombok
68 | lombok
69 | ${lombok.version}
70 | provided
71 |
72 |
73 |
74 | io.springfox
75 | springfox-swagger2
76 | ${springfox.swagger2.version}
77 |
78 |
79 |
80 | io.springfox
81 | springfox-swagger-ui
82 | ${springfox.swagger.ui.version}
83 |
84 |
85 |
86 | com.fasterxml.jackson.datatype
87 | jackson-datatype-jsr310
88 | ${jackson.datatype.jsr310.version}
89 |
90 |
91 |
92 | javax.xml.bind
93 | jaxb-api
94 | ${jaxb.api.version}
95 |
96 |
97 |
98 | com.sun.xml.bind
99 | jaxb-core
100 | ${jaxb.core.version}
101 |
102 |
103 |
104 | com.sun.xml.bind
105 | jaxb-impl
106 | ${jaxb.impl.version}
107 |
108 |
109 |
110 | javax.activation
111 | activation
112 | ${activation.version}
113 |
114 |
115 |
116 | javax.validation
117 | validation-api
118 | ${validation.api.version}
119 |
120 |
121 |
122 |
123 |
124 |
125 | org.springframework.boot
126 | spring-boot-maven-plugin
127 |
128 |
129 |
130 | io.fabric8
131 | docker-maven-plugin
132 | ${docker.maven.plugin.version}
133 |
134 |
135 |
136 | ${project.artifactId}
137 | jplu/${project.artifactId}:${profile}-${revision}
138 |
139 | openjdk:oracle
140 | Julien Plu
141 |
142 | docker-assembly.xml
143 |
144 | /maven
145 |
146 | yum install -y wget
147 | wget -O apm-agent.jar https://search.maven.org/remotecontent?filepath=co/elastic/apm/elastic-apm-agent/${elastic.apm.agent.version}/elastic-apm-agent-${elastic.apm.agent.version}.jar
148 |
149 |
150 | java -javaagent:/maven/apm-agent.jar -jar -Xmx4g ${project.build.finalName}.jar --spring.profiles.active=${profile}
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 | org.apache.maven.plugins
159 | maven-compiler-plugin
160 |
161 | 13
162 | 13
163 |
164 |
165 |
166 |
167 |
168 |
--------------------------------------------------------------------------------
/adel-api/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/ADELAPIApplication.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
6 | import org.springframework.context.annotation.Bean;
7 | import org.springframework.web.servlet.config.annotation.CorsRegistry;
8 | import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
9 |
10 | /**
11 | * @author Julien Plu on 2019-03-19.
12 | */
13 | @SpringBootApplication(scanBasePackages = "fr.eurecom.adel")
14 | @EnableDiscoveryClient
15 | public class ADELAPIApplication {
16 | public static void main(final String... args) {
17 | SpringApplication.run(ADELAPIApplication.class, args);
18 | }
19 |
20 | @Bean
21 | public WebMvcConfigurer webConfigurer() {
22 | return new WebMvcConfigurer() {
23 | @Override
24 | public void addCorsMappings(final CorsRegistry registry) {
25 | registry.addMapping("/**")
26 | .allowedOrigins("*")
27 | .allowedMethods("*")
28 | .allowedHeaders("Content-Type");
29 | }
30 | };
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/configuration/BasePathAwareRelativePathProvider.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.configuration;
2 |
3 | import org.springframework.web.util.UriComponentsBuilder;
4 |
5 | import javax.servlet.ServletContext;
6 |
7 | import springfox.documentation.spring.web.paths.Paths;
8 | import springfox.documentation.spring.web.paths.RelativePathProvider;
9 |
10 | /**
11 | * @author Julien Plu on 2019-03-19.
12 | */
13 | class BasePathAwareRelativePathProvider extends RelativePathProvider {
14 | private String basePath;
15 |
16 | public BasePathAwareRelativePathProvider(ServletContext servletContext, String basePath) {
17 | super(servletContext);
18 | this.basePath = basePath;
19 | }
20 |
21 | @Override
22 | protected String applicationPath() {
23 | return Paths.removeAdjacentForwardSlashes(UriComponentsBuilder.fromPath(super.applicationPath()).path(this.basePath).build().toString());
24 | }
25 |
26 | @Override
27 | public String getOperationPath(String operationPath) {
28 | UriComponentsBuilder uriComponentsBuilder = UriComponentsBuilder.fromPath("/");
29 | return Paths.removeAdjacentForwardSlashes(
30 | uriComponentsBuilder.path(operationPath.replaceFirst("^" + this.basePath, "")).build().toString());
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/configuration/SwaggerDocumentationConfig.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.configuration;
2 |
3 | import org.springframework.context.annotation.Bean;
4 | import org.springframework.beans.factory.annotation.Value;
5 | import org.springframework.context.annotation.Configuration;
6 |
7 | import java.sql.Date;
8 | import java.time.LocalDate;
9 | import java.time.OffsetDateTime;
10 |
11 | import javax.servlet.ServletContext;
12 |
13 | import springfox.documentation.builders.ApiInfoBuilder;
14 | import springfox.documentation.builders.RequestHandlerSelectors;
15 | import springfox.documentation.service.ApiInfo;
16 | import springfox.documentation.service.Contact;
17 | import springfox.documentation.service.Tag;
18 | import springfox.documentation.spi.DocumentationType;
19 | import springfox.documentation.spring.web.plugins.Docket;
20 | import springfox.documentation.swagger2.annotations.EnableSwagger2;
21 |
22 | /**
23 | * @author Julien Plu on 2019-03-19.
24 | */
25 | @Configuration
26 | @EnableSwagger2
27 | public class SwaggerDocumentationConfig {
28 | private ApiInfo apiInfo() {
29 | return new ApiInfoBuilder()
30 | .title("ADEL API")
31 | .description("REST API of ADEL. More information on [Github](https://github.com/jplu/ADEL)")
32 | .license("Apache 2.0")
33 | .licenseUrl("http://www.apache.org/licenses/LICENSE-2.0.html")
34 | .termsOfServiceUrl("https://github.com/jplu/ADEL")
35 | .version("2.0.0")
36 | .contact(new Contact("Julien Plu", "https://jplu.github.io", ""))
37 | .build();
38 | }
39 |
40 | @Bean
41 | public Docket apiDocket(final ServletContext servletContext, @Value("${openapi.ADEL.base-path:/}") final String basePath) {
42 | return new Docket(DocumentationType.SWAGGER_2)
43 | .select()
44 | .apis(RequestHandlerSelectors.basePackage("fr.eurecom.adel.api"))
45 | .build()
46 | .pathProvider(new BasePathAwareRelativePathProvider(servletContext, basePath))
47 | .apiInfo(this.apiInfo());
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/controllers/EntityRecognition.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.controllers;
2 |
3 | import fr.eurecom.adel.api.formatter.DocumentFormatter;
4 | import fr.eurecom.adel.api.formatter.ProfileFormatter;
5 | import fr.eurecom.adel.commons.datatypes.Document;
6 | import fr.eurecom.adel.api.converters.DocumentConverter;
7 | import fr.eurecom.adel.commons.exceptions.NIFMalformedException;
8 | import fr.eurecom.adel.commons.formats.NIF;
9 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
10 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
11 | import fr.eurecom.adel.recognition.usecases.RecognitionPipeline;
12 | import io.swagger.annotations.Api;
13 | import io.swagger.annotations.ApiOperation;
14 | import io.swagger.annotations.ApiParam;
15 | import io.swagger.annotations.ApiResponse;
16 | import io.swagger.annotations.ApiResponses;
17 |
18 | import org.springframework.beans.factory.annotation.Autowired;
19 | import org.springframework.http.HttpStatus;
20 | import org.springframework.http.MediaType;
21 | import org.springframework.http.ResponseEntity;
22 | import org.springframework.web.bind.annotation.GetMapping;
23 | import org.springframework.web.bind.annotation.PostMapping;
24 | import org.springframework.web.bind.annotation.RequestBody;
25 | import org.springframework.web.bind.annotation.RequestMapping;
26 | import org.springframework.web.bind.annotation.RestController;
27 | import org.springframework.web.server.ResponseStatusException;
28 |
29 | import java.util.List;
30 |
31 | /**
32 | * @author Julien Plu on 17/11/2018.
33 | */
34 |
35 | @RestController
36 | @RequestMapping("/api/v2/recognize")
37 | @Api
38 | public class EntityRecognition {
39 | private RecognitionPipeline pipeline;
40 |
41 | @Autowired
42 | public final void setPipeline(final RecognitionPipeline newPipeline) {
43 | this.pipeline = newPipeline;
44 | }
45 |
46 | @ApiOperation(value = "Get current entity recognition profile", notes = "Get current entity recognition profile", response = ProfileFormatter.class, tags = "entity-recognition")
47 | @ApiResponses(@ApiResponse(code = 200, message = "The process went well", response = ProfileFormatter.class))
48 | @GetMapping(value = "/profile", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
49 | public final ProfileFormatter profile() {
50 | return new ProfileFormatter(this.pipeline.getConfig());
51 | }
52 |
53 | @ApiOperation(value = "Entity recognition over a text", notes = "Entity recognition over a text", response = DocumentFormatter.class, tags = "entity-recognition")
54 | @ApiResponses(@ApiResponse(code = 200, message = "The process went well", response = DocumentFormatter.class))
55 | @PostMapping(value = "", consumes = MediaType.APPLICATION_JSON_UTF8_VALUE, produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
56 | public final DocumentFormatter recognize(@ApiParam(value = "Input of the recognize endpoint", required=true) @RequestBody final DocumentConverter documentConverter) {
57 | if (documentConverter.getText() == null) {
58 | throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "The text property is missing");
59 | }
60 |
61 | final Document document;
62 |
63 | try {
64 | document = this.pipeline.run(documentConverter.toDocument().getText()).get("adel");
65 | } catch (final MappingNotExistsException | TypeNotExistsException ex) {
66 | throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, ex.getMessage(), ex);
67 | }
68 |
69 | return new DocumentFormatter(document);
70 | }
71 |
72 | @ApiOperation(value = "Entity recognition over a NIF content", nickname = "recognizeNIF", notes = "Entity recognition over a NIF content")
73 | @ApiResponses(@ApiResponse(code = 200, message = "The process went well"))
74 | @PostMapping(value = "/nif", consumes = "application/x-turtle;charset=utf-8", produces = "application/x-turtle;charset=utf-8")
75 | public final String recognizeNIF(@ApiParam(required = true) @RequestBody final String request) {
76 | final NIF nif = new NIF();
77 |
78 | try {
79 | nif.setNIF(request);
80 |
81 | final List documents = nif.documents();
82 |
83 | for (final String doc : documents) {
84 | nif.addDocument(this.pipeline.run(doc).get("adel"));
85 | }
86 | } catch (final NIFMalformedException ex) {
87 | throw new ResponseStatusException(HttpStatus.BAD_REQUEST, ex.getMessage(), ex);
88 | } catch (final MappingNotExistsException | TypeNotExistsException ex) {
89 | throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, ex.getMessage(), ex);
90 | }
91 |
92 | return nif.stringOutput();
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/controllers/Utilities.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.controllers;
2 |
3 | import org.springframework.http.HttpStatus;
4 | import org.springframework.http.ResponseEntity;
5 | import org.springframework.ui.ModelMap;
6 | import org.springframework.web.bind.annotation.GetMapping;
7 | import org.springframework.web.bind.annotation.RestController;
8 | import org.springframework.web.servlet.ModelAndView;
9 |
10 | import springfox.documentation.annotations.ApiIgnore;
11 |
12 | /**
13 | * @author Julien Plu on 2019-03-19.
14 | */
15 | @ApiIgnore
16 | @RestController
17 | public class Utilities {
18 | @GetMapping("/")
19 | public final ModelAndView index(final ModelMap model) {
20 | return new ModelAndView("redirect:/swagger-ui.html", model);
21 | }
22 |
23 | @GetMapping("/health")
24 | public final ResponseEntity healthcheck() {
25 | return new ResponseEntity(HttpStatus.OK);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/converters/DocumentConverter.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.converters;
2 |
3 | import javax.validation.constraints.NotNull;
4 |
5 | import fr.eurecom.adel.commons.datatypes.Document;
6 | import lombok.Getter;
7 | import lombok.Setter;
8 |
9 | /**
10 | * @author Julien Plu on 17/11/2018.
11 | */
12 | @Getter
13 | @Setter
14 | public class DocumentConverter {
15 | private @NotNull String text;
16 |
17 | public final Document toDocument() {
18 | return Document.builder().text(this.text).build();
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/formatter/DocumentFormatter.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.formatter;
2 |
3 | import java.util.List;
4 | import java.util.stream.Collectors;
5 |
6 | import fr.eurecom.adel.commons.datatypes.Document;
7 | import fr.eurecom.adel.commons.datatypes.Entity;
8 | import fr.eurecom.adel.commons.datatypes.Token;
9 | import lombok.Getter;
10 | import lombok.Setter;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-09.
14 | */
15 | @Getter
16 | @Setter
17 | public class DocumentFormatter {
18 | private List entities;
19 |
20 | public DocumentFormatter(final Document newDocument) {
21 | this.entities = newDocument.getEntities();
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/formatter/ProfileFormatter.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.formatter;
2 |
3 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
4 | import lombok.Getter;
5 | import lombok.Setter;
6 |
7 | /**
8 | * @author Julien Plu on 2019-02-12.
9 | */
10 | @Getter
11 | @Setter
12 | public class ProfileFormatter {
13 | private final RecognitionProfile recognition;
14 |
15 | public ProfileFormatter(final RecognitionConfig newRecognitionProfile) {
16 | this.recognition = new RecognitionProfile(newRecognitionProfile);
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/adel-api/src/main/java/fr/eurecom/adel/api/formatter/RecognitionProfile.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.api.formatter;
2 |
3 | import java.util.List;
4 |
5 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
6 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
7 | import fr.eurecom.adel.recognition.configuration.TweetNormalizationConfig;
8 | import fr.eurecom.adel.recognition.configuration.TypeOverlappingConfig;
9 | import lombok.Getter;
10 | import lombok.Setter;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-12.
14 | */
15 | @Getter
16 | @Setter
17 | public class RecognitionProfile {
18 | private final List annotators;
19 | private final TypeOverlappingConfig typeOverlapping;
20 | private final TweetNormalizationConfig tweetNormalization;
21 | private final String mentionOverlapping;
22 |
23 | public RecognitionProfile(final RecognitionConfig newRecognitionConfig) {
24 | this.annotators = newRecognitionConfig.getAnnotators();
25 | this.typeOverlapping = newRecognitionConfig.getTypeoverlapping();
26 | this.tweetNormalization = newRecognitionConfig.getTweetnormalization();
27 | this.mentionOverlapping = newRecognitionConfig.getMentionoverlapping();
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/adel-api/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | failFast: true
6 | retry:
7 | initialInterval: 3000
8 | multiplier: 1.3
9 | maxInterval: 5000
10 | maxAttempts: 20
11 | application:
12 | name: adel
13 | eureka:
14 | client:
15 | serviceUrl:
16 | defaultZone: ${EUREKA_URI:http://localhost:8761/eureka}
--------------------------------------------------------------------------------
/adel-commons/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-commons
6 | ${revision}
7 |
8 |
9 |
10 | org.apache.maven.plugins
11 | maven-compiler-plugin
12 |
13 | 13
14 | 13
15 |
16 |
17 |
18 |
19 | adel-commons
20 | http://adel.eurecom.fr
21 |
22 |
23 | fr.eurecom.adel
24 | adel-pom
25 | ${revision}
26 |
27 |
28 |
29 |
30 | org.springframework.boot
31 | spring-boot-starter
32 |
33 |
34 |
35 | org.springframework.boot
36 | spring-boot-configuration-processor
37 | true
38 |
39 |
40 |
41 | org.reflections
42 | reflections
43 | ${reflections.version}
44 |
45 |
46 |
47 | org.projectlombok
48 | lombok
49 | ${lombok.version}
50 | provided
51 |
52 |
53 |
54 | org.hibernate
55 | hibernate-validator
56 | ${hibernate.validator.version}
57 |
58 |
59 |
60 | com.twitter.twittertext
61 | twitter-text
62 | ${twitter.text.version}
63 |
64 |
65 |
66 | com.vdurmont
67 | emoji-java
68 | ${emoji.java.version}
69 |
70 |
71 |
72 | org.javatuples
73 | javatuples
74 | ${javatuples.version}
75 |
76 |
77 |
78 | de.vandermeer
79 | asciitable
80 | ${asciitable.version}
81 |
82 |
83 |
84 | org.apache.jena
85 | apache-jena-libs
86 | pom
87 | ${apache.jena.libs.version}
88 |
89 |
90 |
91 | org.json
92 | json
93 | ${json.version}
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/datatypes/Document.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.datatypes;
2 |
3 | import java.util.List;
4 |
5 | import javax.validation.constraints.NotNull;
6 |
7 | import lombok.Builder;
8 | import lombok.Getter;
9 | import lombok.Setter;
10 | import lombok.ToString;
11 |
12 | /**
13 | * @author Julien Plu on 17/11/2018.
14 | */
15 | @Getter
16 | @Setter
17 | @ToString
18 | @Builder
19 | public final class Document {
20 | private final @NotNull String text;
21 | private List entities;
22 | private List> tokens;
23 | }
24 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/datatypes/Entity.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.datatypes;
2 |
3 | import javax.validation.constraints.NotNull;
4 |
5 | import lombok.Builder;
6 | import lombok.Getter;
7 | import lombok.Setter;
8 | import lombok.ToString;
9 |
10 | /**
11 | * @author Julien Plu on 17/11/2018.
12 | */
13 | @Getter
14 | @Setter
15 | @ToString
16 | @Builder
17 | public final class Entity {
18 | private @NotNull String phrase;
19 | private @NotNull String cleanPhrase;
20 | private @NotNull String type;
21 | private @NotNull Integer startOffset;
22 | private @NotNull Integer endOffset;
23 | }
24 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/datatypes/Sentence.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.datatypes;
2 |
3 | import java.util.List;
4 |
5 | import lombok.Getter;
6 | import lombok.Setter;
7 |
8 | /**
9 | * @author Julien Plu on 2019-02-15.
10 | */
11 | @Getter
12 | @Setter
13 | public class Sentence {
14 | private List tokens;
15 | }
16 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/datatypes/Token.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.datatypes;
2 |
3 | import lombok.Builder;
4 | import lombok.Getter;
5 | import lombok.Setter;
6 |
7 | /**
8 | * @author Julien Plu on 2019-02-09.
9 | */
10 | @Getter
11 | @Setter
12 | @Builder
13 | public class Token {
14 | private String value;
15 | private Integer begin;
16 | private Integer end;
17 | }
18 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/datatypes/TweetEntity.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.datatypes;
2 |
3 | import javax.validation.constraints.NotNull;
4 |
5 | import lombok.Builder;
6 | import lombok.Getter;
7 | import lombok.Setter;
8 | import lombok.ToString;
9 |
10 | /**
11 | * @author Julien Plu on 2019-01-05.
12 | */
13 | @Getter
14 | @Setter
15 | @ToString
16 | @Builder
17 | public class TweetEntity {
18 | private @NotNull String phrase;
19 | private String cleanPhrase;
20 | private @NotNull Integer startOffset;
21 | private @NotNull Integer endOffset;
22 | }
23 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/exceptions/CoNLLMalformedException.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.exceptions;
2 |
3 | /**
4 | * @author Julien Plu on 2019-02-18.
5 | */
6 | public class CoNLLMalformedException extends Exception {
7 | public CoNLLMalformedException(final String errorMessage) {
8 | super(errorMessage);
9 | }
10 |
11 | public CoNLLMalformedException(final String errorMessage, final Throwable err) {
12 | super(errorMessage, err);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/exceptions/NIFMalformedException.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.exceptions;
2 |
3 | /**
4 | * @author Julien Plu on 2019-02-28.
5 | */
6 | public class NIFMalformedException extends Exception {
7 | public NIFMalformedException(final String errorMessage) {
8 | super(errorMessage);
9 | }
10 |
11 | public NIFMalformedException(final String errorMessage, final Throwable err) {
12 | super(errorMessage, err);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/exceptions/TACMalformedException.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.exceptions;
2 |
3 | /**
4 | * @author Julien Plu on 2019-03-02.
5 | */
6 | public class TACMalformedException extends Exception {
7 | public TACMalformedException(final String errorMessage) {
8 | super(errorMessage);
9 | }
10 |
11 | public TACMalformedException(final String errorMessage, final Throwable err) {
12 | super(errorMessage, err);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/formats/CoNLL.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.formats;
2 |
3 | import org.javatuples.Triplet;
4 |
5 | import java.io.IOException;
6 | import java.nio.charset.Charset;
7 | import java.nio.file.Files;
8 | import java.nio.file.OpenOption;
9 | import java.nio.file.Path;
10 | import java.nio.file.Paths;
11 | import java.nio.file.StandardOpenOption;
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Collection;
15 | import java.util.Collections;
16 | import java.util.HashMap;
17 | import java.util.List;
18 | import java.util.Map;
19 |
20 | import fr.eurecom.adel.commons.datatypes.Document;
21 | import fr.eurecom.adel.commons.datatypes.Entity;
22 | import fr.eurecom.adel.commons.datatypes.Token;
23 | import fr.eurecom.adel.commons.exceptions.CoNLLMalformedException;
24 | import fr.eurecom.adel.commons.utils.ScoringUtils;
25 | import lombok.extern.slf4j.Slf4j;
26 |
27 | /**
28 | * @author Julien Plu on 2019-02-15.
29 | */
30 | @Slf4j
31 | public class CoNLL {
32 | private Path input;
33 | private Path output;
34 | private final List> conllDocuments;
35 | private final List> conllLabels;
36 | private final List> conllGoldLabels;
37 | private String conllContent;
38 |
39 | public CoNLL(final String newInput, final String newOutput, final boolean is2003Format) throws CoNLLMalformedException, IOException {
40 | this.input = Paths.get(newInput);
41 | this.output = Paths.get(newOutput);
42 | this.conllDocuments = new ArrayList<>();
43 | this.conllLabels = new ArrayList<>();
44 | this.conllGoldLabels = new ArrayList<>();
45 |
46 | this.read(false, is2003Format);
47 | }
48 |
49 | public CoNLL(final String newInput, final boolean isEval, final boolean is2003Format) throws CoNLLMalformedException, IOException {
50 | this.input = Paths.get(newInput);
51 | this.conllDocuments = new ArrayList<>();
52 | this.conllLabels = new ArrayList<>();
53 | this.conllGoldLabels = new ArrayList<>();
54 |
55 | this.read(isEval, is2003Format);
56 | }
57 |
58 | public CoNLL(final String newOutput) {
59 | this.output = Paths.get(newOutput);
60 | this.conllDocuments = new ArrayList<>();
61 | this.conllLabels = new ArrayList<>();
62 | this.conllGoldLabels = new ArrayList<>();
63 | }
64 |
65 | public CoNLL() {
66 | this.conllDocuments = new ArrayList<>();
67 | this.conllLabels = new ArrayList<>();
68 | this.conllGoldLabels = new ArrayList<>();
69 | }
70 |
71 | public final List documents() {
72 | final List documentsAsText = new ArrayList<>();
73 |
74 | for (final List doc : this.conllDocuments) {
75 | documentsAsText.add(String.join(" ", doc));
76 | }
77 |
78 | return documentsAsText;
79 | }
80 |
81 | private void read(final boolean isEval, final boolean is2003Format) throws CoNLLMalformedException, IOException {
82 | final List lines = Files.readAllLines(this.input, Charset.forName("UTF-8"));
83 |
84 | if (!lines.get(lines.size() - 1).isEmpty()) {
85 | throw new CoNLLMalformedException("Malformed CoNLL: new line missing at the end of the file " + this.input);
86 | }
87 |
88 | if (is2003Format && isEval) {
89 | throw new CoNLLMalformedException("Cannot evaluate a CoNLL02/CoNLL03 formats");
90 | }
91 |
92 | int count = 2;
93 |
94 | if (isEval) {
95 | count = 3;
96 | } else if (is2003Format) {
97 | count = 4;
98 | }
99 |
100 | List conllDocument = new ArrayList<>();
101 | List conllGoldLabelsInDocument = new ArrayList<>();
102 | final List conllLabelsInDocuments = new ArrayList<>();
103 | final Collection words = new ArrayList<>();
104 | final Collection goldLabels = new ArrayList<>();
105 | final Collection labels = new ArrayList<>();
106 | int lineNumber = 0;
107 |
108 | for (final String line : lines) {
109 | lineNumber++;
110 | final String contents = line.trim();
111 | final String[] tokens = contents.split(" ");
112 |
113 | if (tokens.length == count) {
114 | if ("-DOCSTART-".equals(tokens[0]) && !conllDocument.isEmpty()) {
115 | this.conllDocuments.add(conllDocument);
116 | this.conllGoldLabels.add(conllGoldLabelsInDocument);
117 |
118 | conllDocument = new ArrayList<>();
119 | conllGoldLabelsInDocument = new ArrayList<>();
120 | } else if (!"-DOCSTART-".equals(tokens[0])) {
121 | words.add(tokens[0]);
122 |
123 | if (isEval) {
124 | labels.add(tokens[1].replaceAll("[I|B]-", ""));
125 | goldLabels.add(tokens[2].replaceAll("[I|B]-", ""));
126 | } else if (is2003Format) {
127 | goldLabels.add(tokens[3].replaceAll("[I|B]-", ""));
128 | } else {
129 | goldLabels.add(tokens[1].replaceAll("[I|B]-", ""));
130 | }
131 | }
132 | } else if (contents.isEmpty()) {
133 | if (!words.isEmpty()) {
134 | conllDocument.add(String.join(" ", words));
135 | conllGoldLabelsInDocument.add(String.join(" ", goldLabels));
136 | words.clear();
137 | goldLabels.clear();
138 |
139 | if (isEval) {
140 | conllLabelsInDocuments.add(String.join(" ", labels));
141 | labels.clear();
142 | }
143 | }
144 | } else {
145 | throw new CoNLLMalformedException("Malformed CoNLL: the line " + lineNumber + " in file " + this.input + " has " + contents.split(" ").length + " columns instead of " + count);
146 | }
147 | }
148 |
149 | this.conllDocuments.add(conllDocument);
150 | this.conllGoldLabels.add(conllGoldLabelsInDocument);
151 |
152 | if (isEval) {
153 | this.conllLabels.add(conllLabelsInDocuments);
154 | }
155 | }
156 |
157 | public final void addDocument(final Document document) {
158 | final List conllLabelsInDocument = new ArrayList<>();
159 | final List conllDocument = new ArrayList<>();
160 |
161 | for (final List sentence : document.getTokens() ) {
162 | final Collection conllSentence = new ArrayList<>();
163 | final Collection conllLabelsInSentence = new ArrayList<>();
164 |
165 | for (final Token token : sentence) {
166 | boolean found = false;
167 |
168 | for (final Entity entity : document.getEntities()) {
169 | if (token.getBegin() >= entity.getStartOffset() && token.getEnd() <= entity.getEndOffset()) {
170 | found = true;
171 |
172 | conllLabelsInSentence.add(entity.getType());
173 | break;
174 | }
175 | }
176 |
177 | if (!found) {
178 | conllLabelsInSentence.add("O");
179 | }
180 |
181 | if (this.conllGoldLabels.isEmpty()) {
182 | conllSentence.add(token.getValue());
183 | }
184 | }
185 |
186 | conllLabelsInDocument.add(String.join(" ", conllLabelsInSentence));
187 |
188 | if (this.conllGoldLabels.isEmpty()) {
189 | conllDocument.add(String.join(" ", conllSentence));
190 | }
191 | }
192 |
193 | this.conllLabels.add(conllLabelsInDocument);
194 |
195 | if (this.conllGoldLabels.isEmpty()) {
196 | this.conllDocuments.add(conllDocument);
197 | }
198 | }
199 |
200 | private String toCoNLLText() {
201 | final StringBuilder sb = new StringBuilder();
202 |
203 | for (int i = 0;i < this.conllDocuments.size();i++) {
204 | if (this.conllGoldLabels.isEmpty()) {
205 | sb.append("-DOCSTART- O");
206 | } else {
207 | sb.append("-DOCSTART- O O");
208 | }
209 |
210 | sb.append(System.lineSeparator());
211 | sb.append(System.lineSeparator());
212 |
213 | for (int j = 0; j < this.conllDocuments.get(i).size(); j++) {
214 | final List labels = Arrays.asList(this.conllLabels.get(i).get(j).split(" "));
215 | final List tokens = Arrays.asList(this.conllDocuments.get(i).get(j).split(" "));
216 | final List> lines;
217 |
218 | if (this.conllGoldLabels.isEmpty()) {
219 | lines = this.zip(tokens, labels);
220 | } else {
221 | final List goldLabels = Arrays.asList(this.conllGoldLabels.get(i).get(j).split(" "));
222 |
223 | lines = this.zip(tokens, labels, goldLabels);
224 | }
225 |
226 | for (final List line : lines) {
227 | sb.append(String.join(" ", line));
228 | sb.append(System.lineSeparator());
229 | }
230 |
231 | sb.append(System.lineSeparator());
232 | }
233 | }
234 |
235 | return sb.toString();
236 | }
237 |
238 | public final void write(final boolean print) throws IOException {
239 | this.conllContent = this.toCoNLLText();
240 |
241 | if (print) {
242 | CoNLL.log.info("{}{}", System.lineSeparator(), this.conllContent);
243 | }
244 |
245 | if (this.output != null) {
246 | Files.writeString(this.output, this.conllContent);
247 | }
248 | }
249 |
250 | public final void scorerExtraction() {
251 | // Precision = TP / (TP + FP)
252 | // Rappel = TP / (TP + FN)
253 | // F1 = 2 . (P . R) / (P + R)
254 | // ACC = (TP + TN) / (TP + TN + FP + FN)
255 | // Triplet
256 | if (this.conllContent == null) {
257 | this.conllContent = this.toCoNLLText();
258 | }
259 |
260 | final String[] lines = this.conllContent.split(System.lineSeparator());
261 | final Map> annotations = new HashMap<>();
262 |
263 | for (final String line : lines) {
264 | final String[] columns = line.split(" ");
265 |
266 | if (columns.length == 3 && !"-DOCSTART-".equals(columns[0])) {
267 | if (!"O".equals(columns[2]) && !"O".equals(columns[1])) {
268 | annotations.computeIfPresent("", (key, val) -> val.setAt0(val.getValue0() + 1));
269 | annotations.computeIfAbsent("", x -> Triplet.with(1, 0, 0));
270 | } else {
271 | if (!"O".equals(columns[2])) {
272 | annotations.computeIfPresent("", (key, val) -> val.setAt2(val.getValue2() + 1));
273 | annotations.computeIfAbsent("", x -> Triplet.with(0, 0, 1));
274 | }
275 |
276 | if (!"O".equals(columns[1])) {
277 | annotations.computeIfPresent("", (key, val) -> val.setAt1(val.getValue1() + 1));
278 | annotations.computeIfAbsent("", x -> Triplet.with(0, 1, 0));
279 | }
280 | }
281 | }
282 | }
283 |
284 | CoNLL.log.info("{}{}", System.lineSeparator(), ScoringUtils.scoreAnnotations(annotations, "Extraction"));
285 | }
286 |
287 | public final void scorerNER() {
288 | // Precision = TP / (TP + FP)
289 | // Rappel = TP / (TP + FN)
290 | // F1 = 2 . (P . R) / (P + R)
291 | // ACC = (TP + TN) / (TP + TN + FP + FN)
292 | // Triplet
293 | if (this.conllContent == null) {
294 | this.conllContent = this.toCoNLLText();
295 | }
296 |
297 | final String[] lines = this.conllContent.split(System.lineSeparator());
298 | final Map> annotations = new HashMap<>();
299 |
300 | for (final String line : lines) {
301 | final String[] columns = line.split(" ");
302 |
303 | if (columns.length == 3 && !"-DOCSTART-".equals(columns[0])) {
304 | if (columns[1].equals(columns[2]) && !"O".equals(columns[1])) {
305 | annotations.computeIfPresent(columns[2], (key, val) -> val.setAt0(val.getValue0() + 1));
306 | annotations.computeIfAbsent(columns[2], x -> Triplet.with(1, 0, 0));
307 | } else {
308 | if (!"O".equals(columns[2])) {
309 | annotations.computeIfPresent(columns[2], (key, val) -> val.setAt2(val.getValue2() + 1));
310 | annotations.computeIfAbsent(columns[2], x -> Triplet.with(0, 0, 1));
311 | }
312 |
313 | if (!"O".equals(columns[1])) {
314 | annotations.computeIfPresent(columns[1], (key, val) -> val.setAt1(val.getValue1() + 1));
315 | annotations.computeIfAbsent(columns[1], x -> Triplet.with(0, 1, 0));
316 | }
317 | }
318 | }
319 | }
320 |
321 | CoNLL.log.info("{}{}", System.lineSeparator(), ScoringUtils.scoreAnnotations(annotations, "Recognition"));
322 | }
323 |
324 | @SafeVarargs
325 | private List> zip(final List... lists) {
326 | final List> zipped = new ArrayList<>();
327 |
328 | for (final List list : lists) {
329 | for (int i = 0, listSize = list.size(); i < listSize; i++) {
330 | final List list2;
331 |
332 | if (i >= zipped.size()) {
333 | zipped.add(list2 = new ArrayList<>());
334 | } else {
335 | list2 = zipped.get(i);
336 | }
337 |
338 | list2.add(list.get(i));
339 | }
340 | }
341 |
342 | return zipped;
343 | }
344 | }
345 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/formats/TAC.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.formats;
2 |
3 | import org.javatuples.Pair;
4 |
5 | import java.io.IOException;
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 | import java.nio.file.Paths;
9 | import java.util.ArrayList;
10 | import java.util.Arrays;
11 | import java.util.Collections;
12 | import java.util.HashMap;
13 | import java.util.List;
14 | import java.util.Map;
15 | import java.util.UUID;
16 |
17 | import fr.eurecom.adel.commons.datatypes.Document;
18 | import fr.eurecom.adel.commons.datatypes.Entity;
19 | import fr.eurecom.adel.commons.exceptions.TACMalformedException;
20 | import fr.eurecom.adel.commons.utils.ScoringUtils;
21 | import lombok.extern.slf4j.Slf4j;
22 |
23 | /**
24 | * @author Julien Plu on 2019-03-02.
25 | */
26 | @Slf4j
27 | public class TAC {
28 | private Path annotationsInput;
29 | private Path textsInput;
30 | private Path annotationsOutput;
31 | private Path textsOutput;
32 | private Map>> goldDocuments;
33 | private Map>> annotatedDocuments;
34 |
35 | public TAC(final String newAnnotationsInput, final String newTextsInput, final String newTextsOutput, final String newAnnotationsOutput) throws IOException, TACMalformedException {
36 | this.annotationsInput = Paths.get(newAnnotationsInput);
37 | this.textsInput = Paths.get(newTextsInput);
38 | this.annotationsOutput = Paths.get(newAnnotationsOutput);
39 | this.textsOutput = Paths.get(newTextsOutput);
40 | this.goldDocuments = new HashMap<>();
41 | this.annotatedDocuments = new HashMap<>();
42 |
43 | this.read("");
44 | }
45 |
46 | public TAC(final String newTextsOutput, final String newAnnotationsOutput) {
47 | this.textsOutput = Paths.get(newTextsOutput);
48 | this.annotationsOutput = Paths.get(newAnnotationsOutput);
49 | this.goldDocuments = new HashMap<>();
50 | this.annotatedDocuments = new HashMap<>();
51 | }
52 |
53 | public TAC(final String newTextsInput) throws IOException, TACMalformedException {
54 | this.textsInput = Paths.get(newTextsInput);
55 | this.goldDocuments = new HashMap<>();
56 | this.annotatedDocuments = new HashMap<>();
57 |
58 | this.read("");
59 | }
60 |
61 | public TAC() {
62 | this.goldDocuments = new HashMap<>();
63 | this.annotatedDocuments = new HashMap<>();
64 | }
65 |
66 | public final void setAnnotationsTextInput(final String newTextInput, final String newAnnotationsInput) throws IOException, TACMalformedException {
67 | this.textsInput = Paths.get(newTextInput);
68 |
69 | this.read(newAnnotationsInput);
70 | }
71 |
72 | public final void setAnnotationsOutput(final String newAnnotationsOutput) {
73 | this.annotationsOutput = Paths.get(newAnnotationsOutput);
74 | }
75 |
76 | public final Map>> goldAnnotations() {
77 | return Collections.unmodifiableMap(this.goldDocuments);
78 | }
79 |
80 | public final void setAnnotations(final Map>> annotations) {
81 | this.annotatedDocuments = new HashMap<>(annotations);
82 | }
83 |
84 | private void read(final String gold) throws IOException, TACMalformedException {
85 | List annotationsLines = new ArrayList<>();
86 |
87 | if (gold.isEmpty() && null != this.annotationsInput) {
88 | annotationsLines = Files.readAllLines(this.annotationsInput);
89 | } else if (!gold.isEmpty()) {
90 | annotationsLines = Files.readAllLines(Paths.get(gold));
91 | }
92 |
93 | final Map tmpTexts = new HashMap<>();
94 | int lineCount = 0;
95 |
96 | if (null != this.textsInput) {
97 | final List textsLines = Files.readAllLines(this.textsInput);
98 |
99 | for (final String textLine : textsLines) {
100 | lineCount++;
101 | final String[] columns = textLine.split("\t");
102 |
103 | if (2 != columns.length) {
104 | throw new TACMalformedException("Malformed TAC file: the line " + lineCount + " in file " + this.textsInput + " has " + columns.length + " columns instead of 2");
105 | }
106 |
107 | tmpTexts.put(columns[0], columns[1]);
108 | }
109 | }
110 |
111 | lineCount = 0;
112 |
113 | for (final String annotationLine : annotationsLines) {
114 | lineCount++;
115 | final String[] columns = annotationLine.split("\t");
116 |
117 | if (6 != columns.length) {
118 | throw new TACMalformedException("Malformed TAC file: the line " + lineCount + " in file " + this.annotationsInput + " has " + columns.length + " columns instead of 6");
119 | }
120 |
121 | if (!tmpTexts.containsKey(columns[0]) && null != this.textsInput) {
122 | throw new TACMalformedException("Malformed TAC file: the document ID " + columns[0] + " at line " + lineCount + " in file " + this.annotationsInput + " does not exists in " + this.textsInput);
123 | }
124 |
125 | String mention = "";
126 |
127 | if (!tmpTexts.isEmpty()) {
128 | mention = tmpTexts.get(columns[0]).substring(Integer.parseInt(columns[1]), Integer.parseInt(columns[2]));
129 | }
130 |
131 | final Entity entity = Entity.builder().phrase(mention)
132 | .cleanPhrase(mention)
133 | .startOffset(Integer.parseInt(columns[1]))
134 | .endOffset(Integer.parseInt(columns[2]))
135 | .type(columns[5]).build();
136 |
137 | if (gold.isEmpty()) {
138 | if (this.annotatedDocuments.containsKey(tmpTexts.get(columns[0]))) {
139 | this.annotatedDocuments.get(tmpTexts.get(columns[0])).getValue1().add(entity);
140 | } else {
141 | this.annotatedDocuments.put(tmpTexts.get(columns[0]), Pair.with(columns[0], new ArrayList<>(Collections.singleton(entity))));
142 | }
143 | } else {
144 | if (this.goldDocuments.containsKey(tmpTexts.get(columns[0]))) {
145 | this.goldDocuments.get(tmpTexts.get(columns[0])).getValue1().add(entity);
146 | } else {
147 | this.goldDocuments.put(tmpTexts.get(columns[0]), Pair.with(columns[0], new ArrayList<>(Collections.singleton(entity))));
148 | }
149 | }
150 | }
151 |
152 | if (this.goldDocuments.isEmpty() && !tmpTexts.isEmpty()) {
153 | for (final Map.Entry entry : tmpTexts.entrySet()) {
154 | this.goldDocuments.put(entry.getValue(), Pair.with(entry.getKey(), new ArrayList<>()));
155 | }
156 | }
157 | }
158 |
159 | public final List documents () {
160 | return new ArrayList<>(this.goldDocuments.keySet());
161 | }
162 |
163 | public final void addDocument(final Document document) {
164 | final String id;
165 |
166 | if (this.goldDocuments.containsKey(document.getText())) {
167 | id = this.goldDocuments.get(document.getText()).getValue0();
168 | } else {
169 | id = UUID.randomUUID().toString();
170 | }
171 |
172 | if (!this.annotatedDocuments.containsKey(document.getText())) {
173 | this.annotatedDocuments.put(document.getText(), Pair.with(id, document.getEntities()));
174 | }
175 | }
176 |
177 | public final void write(final boolean print) throws IOException {
178 | final StringBuilder linesText = new StringBuilder();
179 | final StringBuilder linesAnnotations = new StringBuilder();
180 |
181 | for (final Map.Entry>> entry : this.annotatedDocuments.entrySet()) {
182 | linesText.append(entry.getValue().getValue0());
183 | linesText.append('\t');
184 | linesText.append(entry.getKey());
185 | linesText.append(System.lineSeparator());
186 |
187 | for (final Entity entity : entry.getValue().getValue1()) {
188 | linesAnnotations.append(entry.getValue().getValue0());
189 | linesAnnotations.append('\t');
190 | linesAnnotations.append(entity.getStartOffset());
191 | linesAnnotations.append('\t');
192 | linesAnnotations.append(entity.getEndOffset());
193 | linesAnnotations.append('\t');
194 | linesAnnotations.append('-');
195 | linesAnnotations.append('\t');
196 | linesAnnotations.append("0.0");
197 | linesAnnotations.append('\t');
198 | linesAnnotations.append(entity.getType());
199 | linesAnnotations.append(System.lineSeparator());
200 | }
201 | }
202 |
203 | if (print) {
204 | TAC.log.info("{}{}", System.lineSeparator(), linesAnnotations);
205 | TAC.log.info("{}{}", System.lineSeparator(), linesText);
206 | }
207 |
208 | if (null != this.annotationsOutput) {
209 | Files.write(this.annotationsOutput, Arrays.asList(linesAnnotations.toString().split(System.lineSeparator())));
210 | }
211 |
212 | if (null != this.textsOutput) {
213 | Files.write(this.textsOutput, Arrays.asList(linesText.toString().split(System.lineSeparator())));
214 | }
215 | }
216 |
217 | public final void scorerExtraction(final String inputAnnotationsFile, final String gold) throws IOException, TACMalformedException {
218 | if (!inputAnnotationsFile.isEmpty()) {
219 | this.annotationsInput = Paths.get(inputAnnotationsFile);
220 |
221 | this.read("");
222 | }
223 |
224 | this.read(gold);
225 |
226 | TAC.log.info("{}{}", System.lineSeparator(), ScoringUtils.scoreExtractionNIFAndTAC(this.goldDocuments, this.annotatedDocuments));
227 | }
228 |
229 | public final void scorerNER(final String inputAnnotationsFile, final String gold) throws IOException, TACMalformedException {
230 | if (!inputAnnotationsFile.isEmpty()) {
231 | this.annotationsInput = Paths.get(inputAnnotationsFile);
232 |
233 | this.read("");
234 | }
235 |
236 | this.read(gold);
237 |
238 | TAC.log.info("{}{}", System.lineSeparator(), ScoringUtils.scoreNERNIFAndTAC(this.goldDocuments, this.annotatedDocuments));
239 | }
240 | }
241 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/utils/RDFUtils.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.utils;
2 |
3 | import org.apache.commons.io.IOUtils;
4 | import org.apache.jena.rdf.model.Model;
5 | import org.apache.jena.rdf.model.ModelFactory;
6 | import org.apache.jena.riot.Lang;
7 | import org.apache.jena.riot.RDFDataMgr;
8 |
9 | import java.nio.charset.Charset;
10 | import java.nio.charset.StandardCharsets;
11 | import java.nio.file.Files;
12 | import java.nio.file.Path;
13 |
14 | /**
15 | * @author Julien Plu on 2019-02-26.
16 | */
17 | public class RDFUtils {
18 | public static boolean isValidRdf(final String content) {
19 | try {
20 | final Model model = ModelFactory.createDefaultModel();
21 |
22 | RDFDataMgr.read(model, IOUtils.toInputStream(content, StandardCharsets.UTF_8), Lang.TURTLE);
23 | } catch (final Exception ex) {
24 | return false;
25 | }
26 |
27 | return true;
28 | }
29 |
30 | public static boolean isValidRdf(final Path content) {
31 | try {
32 | final Model model = ModelFactory.createDefaultModel();
33 |
34 | RDFDataMgr.read(model, Files.newInputStream(content), Lang.TURTLE);
35 | } catch (final Exception ex) {
36 | return false;
37 | }
38 |
39 | return true;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/utils/ReflectionUtils.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.utils;
2 |
3 | import org.reflections.Reflections;
4 |
5 | import fr.eurecom.adel.commons.validators.Name;
6 |
7 | /**
8 | * @author Julien Plu on 2019-02-09.
9 | */
10 | public class ReflectionUtils {
11 | public static String getClassNameFromMethod(final String method, final String subPackage) {
12 | final Reflections ref = new Reflections("fr.eurecom.adel.recognition.implementation.repositories." + subPackage);
13 | String className = "";
14 |
15 | for (final Class> cl : ref.getTypesAnnotatedWith(Name.class)) {
16 | final Name name = cl.getAnnotation(Name.class);
17 |
18 | if (name.name().equals(method)) {
19 | className = cl.getCanonicalName();
20 | }
21 | }
22 |
23 | return className;
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/utils/ScoringUtils.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.utils;
2 |
3 | import de.vandermeer.asciitable.AsciiTable;
4 |
5 | import org.javatuples.Pair;
6 | import org.javatuples.Triplet;
7 |
8 | import java.util.HashMap;
9 | import java.util.List;
10 | import java.util.Map;
11 |
12 | import fr.eurecom.adel.commons.datatypes.Entity;
13 |
14 | /**
15 | * @author Julien Plu on 2019-02-28.
16 | */
17 | public class ScoringUtils {
18 | public static String scoreAnnotations(final Map> annotations, final String task) {
19 | final AsciiTable at = new AsciiTable();
20 |
21 | at.addRule();
22 | at.addRow(task, "precision", "recall", "f1-score");
23 | at.addRule();
24 |
25 | int totalTP = 0;
26 | int totalFP = 0;
27 | int totalFN = 0;
28 | double macroPrecision = 0.0;
29 | double macroRecall = 0.0;
30 | double macroF1 = 0.0;
31 |
32 | for (final Map.Entry> entry : annotations.entrySet()) {
33 | totalTP += entry.getValue().getValue0();
34 | totalFP += entry.getValue().getValue1();
35 | totalFN += entry.getValue().getValue2();
36 |
37 | final double precision = (entry.getValue().getValue0() / (double) (entry.getValue().getValue0() + entry.getValue().getValue1()));
38 | final double recall = (entry.getValue().getValue0() / (double) (entry.getValue().getValue0() + entry.getValue().getValue2()));
39 |
40 | macroPrecision += precision;
41 | macroRecall += recall;
42 |
43 | final double f1 = 2.0 * (precision * recall) / (precision + recall);
44 |
45 | macroF1 += f1;
46 |
47 | at.addRow(entry.getKey(), Math.round(precision * 10000.0) /10000.0, Math.round(recall * 10000.0) /10000.0, Math.round(f1*10000.0)/10000.0);
48 | at.addRule();
49 | }
50 |
51 | final double totalPrecision = (totalTP / (double) (totalTP + totalFP));
52 | final double totalRecall = (totalTP / (double) (totalTP + totalFN));
53 | final double totalF1 = 2.0 * (totalPrecision * totalRecall) / (totalPrecision + totalRecall);
54 |
55 | at.addRow("micro avg", Math.round(totalPrecision*10000.0)/10000.0, Math.round(totalRecall*10000.0)/10000.0, Math.round(totalF1*10000.0)/10000.0);
56 | at.addRule();
57 | at.addRow("macro avg", Math.round((macroPrecision / annotations.size())*10000.0)/10000.0, Math.round((macroRecall / annotations.size())*10000.0)/10000.0, Math.round((macroF1 / annotations.size())*10000.0)/10000.0);
58 | at.addRule();
59 |
60 | return at.render();
61 | }
62 |
63 | public static String scoreExtractionNIFAndTAC(final Map>> initialDocuments, final Map>> annotatedDocuments) {
64 | final Map> annotations = new HashMap<>();
65 |
66 | for (final Map.Entry>> entry : initialDocuments.entrySet()) {
67 | final List annotatedEntities = annotatedDocuments.get(entry.getKey()).getValue1();
68 |
69 | for (final Entity initialEntity : entry.getValue().getValue1()) {
70 | boolean found = false;
71 |
72 | for (final Entity annotatedEntity : annotatedEntities) {
73 | if (initialEntity.getStartOffset().equals(annotatedEntity.getStartOffset()) && initialEntity.getEndOffset().equals(annotatedEntity.getEndOffset())) {
74 | annotations.computeIfPresent("", (key, val) -> val.setAt0(val.getValue0() + 1));
75 | annotations.computeIfAbsent("", x -> Triplet.with(1, 0, 0));
76 |
77 | found = true;
78 | }
79 | }
80 |
81 | if (!found) {
82 | annotations.computeIfPresent("", (key, val) -> val.setAt2(val.getValue2() + 1));
83 | annotations.computeIfAbsent("", x -> Triplet.with(0, 0, 1));
84 | }
85 | }
86 |
87 | for (final Entity annotatedEntity : annotatedEntities) {
88 | boolean found = false;
89 |
90 | for (final Entity initialEntity : entry.getValue().getValue1()) {
91 | if (initialEntity.getStartOffset().equals(annotatedEntity.getStartOffset()) && initialEntity.getEndOffset().equals(annotatedEntity.getEndOffset())) {
92 | found = true;
93 | }
94 | }
95 |
96 | if (!found) {
97 | annotations.computeIfPresent("", (key, val) -> val.setAt1(val.getValue1() + 1));
98 | annotations.computeIfAbsent("", x -> Triplet.with(0, 1, 0));
99 | }
100 | }
101 | }
102 |
103 | return ScoringUtils.scoreAnnotations(annotations, "Extraction");
104 | }
105 |
106 | public static String scoreNERNIFAndTAC(final Map>> initialDocuments, final Map>> annotatedDocuments) {
107 | final Map> annotations = new HashMap<>();
108 |
109 | for (final Map.Entry>> entry : initialDocuments.entrySet()) {
110 | final List annotatedEntities = annotatedDocuments.get(entry.getKey()).getValue1();
111 |
112 | for (final Entity initialEntity : entry.getValue().getValue1()) {
113 | boolean found = false;
114 |
115 | for (final Entity annotatedEntity : annotatedEntities) {
116 | if (initialEntity.getStartOffset().equals(annotatedEntity.getStartOffset()) && initialEntity.getEndOffset().equals(annotatedEntity.getEndOffset())
117 | && initialEntity.getType().equals(annotatedEntity.getType())) {
118 | annotations.computeIfPresent(initialEntity.getType(), (key, val) -> val.setAt0(val.getValue0() + 1));
119 | annotations.computeIfAbsent(initialEntity.getType(), x -> Triplet.with(1, 0, 0));
120 |
121 | found = true;
122 | }
123 | }
124 |
125 | if (!found) {
126 | annotations.computeIfPresent(initialEntity.getType(), (key, val) -> val.setAt2(val.getValue2() + 1));
127 | annotations.computeIfAbsent(initialEntity.getType(), x -> Triplet.with(0, 0, 1));
128 | }
129 | }
130 |
131 | for (final Entity annotatedEntity : annotatedEntities) {
132 | boolean found = false;
133 |
134 | for (final Entity initialEntity : entry.getValue().getValue1()) {
135 | if (initialEntity.getStartOffset().equals(annotatedEntity.getStartOffset()) && initialEntity.getEndOffset().equals(annotatedEntity.getEndOffset())
136 | && initialEntity.getType().equals(annotatedEntity.getType())) {
137 | found = true;
138 | break;
139 | }
140 | }
141 |
142 | if (!found) {
143 | annotations.computeIfPresent(annotatedEntity.getType(), (key, val) -> val.setAt1(val.getValue1() + 1));
144 | annotations.computeIfAbsent(annotatedEntity.getType(), x -> Triplet.with(0, 1, 0));
145 | }
146 | }
147 | }
148 |
149 | return ScoringUtils.scoreAnnotations(annotations, "Recognition");
150 | }
151 | }
152 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/utils/StringUtils.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.utils;
2 |
3 | import java.text.BreakIterator;
4 |
5 | /**
6 | * @author Julien Plu on 2019-02-26.
7 | */
8 | public class StringUtils {
9 | public static int printLength(final String s) {
10 | final BreakIterator it = BreakIterator.getCharacterInstance();
11 |
12 | it.setText(s);
13 |
14 | int count = 0;
15 |
16 | while (BreakIterator.DONE != it.next()) {
17 | count++;
18 | }
19 |
20 | return count;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/utils/TweetUtils.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.utils;
2 |
3 | import com.twitter.twittertext.Extractor;
4 | import com.vdurmont.emoji.EmojiParser;
5 |
6 | import java.util.ArrayList;
7 | import java.util.List;
8 |
9 | import fr.eurecom.adel.commons.datatypes.TweetEntity;
10 |
11 | /**
12 | * @author Julien Plu on 2019-02-13.
13 | */
14 | public class TweetUtils {
15 | public static List getHashtags(final String tweet) {
16 | final Extractor extractor = new Extractor();
17 | final List tweetEntities = new ArrayList<>();
18 |
19 | for (final var entity : extractor.extractHashtagsWithIndices(tweet)) {
20 | final TweetEntity tweetEntity = TweetEntity.builder().phrase('#' + entity.getValue()).startOffset(entity.getStart()).endOffset(entity.getEnd()).build();
21 |
22 | tweetEntities.add(tweetEntity);
23 | }
24 |
25 | return tweetEntities;
26 | }
27 |
28 | public static List getUserMentions(final String tweet) {
29 | final Extractor extractor = new Extractor();
30 | final List tweetEntities = new ArrayList<>();
31 |
32 | for (final var entity : extractor.extractMentionedScreennamesWithIndices(tweet)) {
33 | final TweetEntity tweetEntity = TweetEntity.builder().phrase('@' + entity.getValue()).startOffset(entity.getStart()).endOffset(entity.getEnd()).build();
34 |
35 | tweetEntities.add(tweetEntity);
36 | }
37 |
38 | return tweetEntities;
39 | }
40 |
41 | public static String removeEmojis(final String tweet) {
42 | return EmojiParser.replaceAllEmojis(tweet, ".");
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/AlreadyExists.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-03-06.
14 | */
15 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.PARAMETER})
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = AlreadyExistsValidator.class)
18 | @Documented
19 | public @interface AlreadyExists {
20 | String message() default "{already.exists}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/AlreadyExistsValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.nio.file.Files;
4 | import java.nio.file.Paths;
5 |
6 | import javax.validation.ConstraintValidator;
7 | import javax.validation.ConstraintValidatorContext;
8 |
9 | /**
10 | * @author Julien Plu on 2019-03-06.
11 | */
12 | public class AlreadyExistsValidator implements ConstraintValidator {
13 | @Override
14 | public final boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
15 | if (t.isEmpty()) {
16 | return true;
17 | }
18 |
19 | return !Files.exists(Paths.get(t));
20 | }
21 | }
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/File.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import javax.validation.Constraint;
4 | import javax.validation.Payload;
5 | import java.lang.annotation.Documented;
6 | import java.lang.annotation.ElementType;
7 | import java.lang.annotation.Retention;
8 | import java.lang.annotation.RetentionPolicy;
9 | import java.lang.annotation.Target;
10 |
11 |
12 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.PARAMETER})
13 | @Retention(RetentionPolicy.RUNTIME)
14 | @Constraint(validatedBy = FileValidator.class)
15 | @Documented
16 | public @interface File {
17 | String message() default "{file}";
18 | Class>[] groups() default {};
19 | Class extends Payload>[] payload() default {};
20 | }
21 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/FileValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.nio.file.Paths;
4 |
5 | import javax.validation.ConstraintValidator;
6 | import javax.validation.ConstraintValidatorContext;
7 |
8 | /**
9 | * @author Julien Plu on 2019-02-12.
10 | */
11 | public class FileValidator implements ConstraintValidator {
12 | @Override
13 | public final boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
14 | if (t.isEmpty()) {
15 | return true;
16 | }
17 |
18 | return Paths.get(t).toFile().isFile();
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/MustExists.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-23.
14 | */
15 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.PARAMETER})
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = MustExistsValidator.class)
18 | @Documented
19 | public @interface MustExists {
20 | String message() default "{must.exists}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/MustExistsValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.nio.file.Files;
4 | import java.nio.file.Paths;
5 |
6 | import javax.validation.ConstraintValidator;
7 | import javax.validation.ConstraintValidatorContext;
8 |
9 | /** @author Julien Plu on 2019-02-23. */
10 | public class MustExistsValidator implements ConstraintValidator {
11 | @Override
12 | public final boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
13 | if (t.isEmpty()) {
14 | return true;
15 | }
16 |
17 | return Files.exists(Paths.get(t));
18 | }
19 | }
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/Name.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.lang.annotation.ElementType;
4 | import java.lang.annotation.Retention;
5 | import java.lang.annotation.RetentionPolicy;
6 | import java.lang.annotation.Target;
7 |
8 | /**
9 | * @author Julien Plu on 2019-02-08.
10 | */
11 | @Retention(RetentionPolicy.RUNTIME)
12 | @Target(ElementType.TYPE)
13 | public @interface Name {
14 | String name();
15 | }
16 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/OneOf.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-28.
14 | */
15 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.CONSTRUCTOR, ElementType.PARAMETER, ElementType.TYPE_USE})
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Documented
18 | @Constraint(validatedBy = OneOfValidator.class)
19 | public @interface OneOf {
20 | String message() default "{one.of}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | String[] value();
24 |
25 | /**
26 | * Whether or not to ignore case.
27 | */
28 | boolean ignoreCase() default false;
29 |
30 | /**
31 | * Whether or not to ignore leading and trailing whitespace.
32 | */
33 | boolean ignoreWhitespace() default false;
34 | }
35 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/OneOfValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import javax.validation.ConstraintValidator;
4 | import javax.validation.ConstraintValidatorContext;
5 |
6 | /**
7 | * @author Julien Plu on 2019-02-28.
8 | */
9 | public class OneOfValidator implements ConstraintValidator {
10 | private String[] values = new String[]{};
11 | private boolean caseInsensitive;
12 | private boolean ignoreWhitespace;
13 |
14 | @Override
15 | public final void initialize(final OneOf constraintAnnotation) {
16 | this.values = constraintAnnotation.value();
17 | this.caseInsensitive = constraintAnnotation.ignoreCase();
18 | this.ignoreWhitespace = constraintAnnotation.ignoreWhitespace();
19 | }
20 |
21 | @Override
22 | public final boolean isValid(final Object t, final ConstraintValidatorContext constraintValidatorContext) {
23 | if (null == t) {
24 | return true;
25 | }
26 |
27 | final String v = this.ignoreWhitespace ? t.toString().trim() : t.toString();
28 |
29 | if (this.caseInsensitive) {
30 | for (final String s : this.values) {
31 | if (s.equalsIgnoreCase(v)) {
32 | return true;
33 | }
34 | }
35 | } else {
36 | for (final String s : this.values) {
37 | if (s.equals(v)) {
38 | return true;
39 | }
40 | }
41 | }
42 |
43 | return false;
44 | }
45 | }
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/Readable.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import javax.validation.Constraint;
4 | import javax.validation.Payload;
5 | import java.lang.annotation.Documented;
6 | import java.lang.annotation.ElementType;
7 | import java.lang.annotation.Retention;
8 | import java.lang.annotation.RetentionPolicy;
9 | import java.lang.annotation.Target;
10 |
11 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.PARAMETER})
12 | @Retention(RetentionPolicy.RUNTIME)
13 | @Constraint(validatedBy = ReadableValidator.class)
14 | @Documented
15 | public @interface Readable {
16 | String message() default "{readable}";
17 | Class>[] groups() default {};
18 | Class extends Payload>[] payload() default {};
19 | }
20 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/ReadableValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.nio.file.Files;
4 | import java.nio.file.Paths;
5 |
6 | import javax.validation.ConstraintValidator;
7 | import javax.validation.ConstraintValidatorContext;
8 |
9 | /**
10 | * @author Julien Plu on 2019-02-12.
11 | */
12 | public class ReadableValidator implements ConstraintValidator {
13 | @Override
14 | public final boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
15 | if (t.isEmpty()) {
16 | return true;
17 | }
18 |
19 | if (!t.startsWith(".") && !t.startsWith("/") && !t.startsWith("~")) {
20 | return Files.isReadable(Paths.get("./" + t).getParent());
21 | }
22 |
23 | return Files.isReadable(Paths.get(t).getParent());
24 | }
25 | }
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/URL.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 |
13 | /**
14 | * @author Julien Plu on 2019-02-09.
15 | */
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.CONSTRUCTOR, ElementType.PARAMETER, ElementType.TYPE_USE})
18 | @Constraint(validatedBy = URLValidator.class)
19 | @Documented
20 | public @interface URL {
21 | String message() default "{URL}";
22 | Class>[] groups() default { };
23 | Class extends Payload>[] payload() default { };
24 | }
25 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/URLValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import org.apache.jena.ext.xerces.util.URI;
4 |
5 | import java.io.IOException;
6 | import java.io.InputStreamReader;
7 | import java.net.HttpURLConnection;
8 | import java.net.MalformedURLException;
9 | import java.net.URISyntaxException;
10 | import java.nio.charset.StandardCharsets;
11 | import java.util.Objects;
12 |
13 | import javax.validation.ConstraintValidator;
14 | import javax.validation.ConstraintValidatorContext;
15 |
16 | /**
17 | * @author Julien Plu on 2019-02-09.
18 | */
19 | public class URLValidator implements ConstraintValidator {
20 | @Override
21 | public boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
22 | if (null == t || t.isEmpty()) {
23 | return true;
24 | }
25 |
26 | if (t.startsWith("classpath")) {
27 | return this.loadProperties(t.replace("classpath:", ""), Thread.currentThread().getContextClassLoader());
28 | }
29 |
30 | try {
31 | new java.net.URL(t).toURI();
32 | } catch (final MalformedURLException | URISyntaxException ex) {
33 | return false;
34 | }
35 |
36 | return true;
37 | }
38 |
39 | private boolean loadProperties(final String file, final ClassLoader loader) {
40 | String name = file;
41 |
42 | if (name.endsWith(".properties")) {
43 | name = name.substring(0, name.length() - ".properties".length());
44 | }
45 |
46 | name = name.replace('.', '/');
47 | name += ".properties";
48 |
49 | try (final InputStreamReader reader = new InputStreamReader(Objects.requireNonNull(loader.getResourceAsStream(name)), StandardCharsets.UTF_8)) {
50 | return true;
51 | } catch (final Exception var8) {
52 | return false;
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/Writable.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import javax.validation.Constraint;
4 | import javax.validation.Payload;
5 | import java.lang.annotation.Documented;
6 | import java.lang.annotation.ElementType;
7 | import java.lang.annotation.Retention;
8 | import java.lang.annotation.RetentionPolicy;
9 | import java.lang.annotation.Target;
10 |
11 | @Target({ElementType.METHOD, ElementType.FIELD, ElementType.ANNOTATION_TYPE, ElementType.PARAMETER})
12 | @Retention(RetentionPolicy.RUNTIME)
13 | @Constraint(validatedBy = WritableValidator.class)
14 | @Documented
15 | public @interface Writable {
16 | String message() default "{writable}";
17 | Class>[] groups() default {};
18 | Class extends Payload>[] payload() default {};
19 | }
20 |
--------------------------------------------------------------------------------
/adel-commons/src/main/java/fr/eurecom/adel/commons/validators/WritableValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.commons.validators;
2 |
3 | import java.nio.file.Files;
4 | import java.nio.file.Paths;
5 |
6 | import javax.validation.ConstraintValidator;
7 | import javax.validation.ConstraintValidatorContext;
8 |
9 | /**
10 | * @author Julien Plu on 2019-02-12.
11 | */
12 | public class WritableValidator implements ConstraintValidator {
13 | @Override
14 | public final boolean isValid(final String t, final ConstraintValidatorContext constraintValidatorContext) {
15 | if (t.isEmpty()) {
16 | return true;
17 | }
18 |
19 | if ("/".equals(t)) {
20 | return false;
21 | }
22 |
23 | if (!t.startsWith(".") && !t.startsWith("/") && !t.startsWith("~")) {
24 | return Files.isWritable(Paths.get("./" + t).getParent());
25 | }
26 |
27 | return Files.isWritable(Paths.get(t).getParent());
28 | }
29 | }
--------------------------------------------------------------------------------
/adel-commons/src/main/resources/ValidationMessages.properties:
--------------------------------------------------------------------------------
1 | propertylist.size=The number of annotators in the priority list is different from the declared annotators.
2 | propertylist.content=Values in '${validatedValue.annotatorsName()}' and in '${validatedValue.getPriority()}' are not the same
3 | has.tokenizer=At least one of the annotators must be designed as tokenizer
4 | name.exists=One of the following names '${validatedValue.implementationNames()}' do not exists as implementation
5 | file=Must be a file
6 | readable=Must be readable
7 | writable=Must be writable
8 | already.exists=Must not already exist
9 | URL=This URL is invalid
10 | unique.name=The name of each annotator must be unique: '${validatedValue.annotatorsName()}'
11 | one.of=Must be one of {value}
12 | must.exists=Must exists
--------------------------------------------------------------------------------
/adel-commons/src/main/resources/ValidationMessages_fr.properties:
--------------------------------------------------------------------------------
1 | propertylist.size=Le nombre d'annotateurs dans la liste de priorité est différent du nombre d'annotateurs déclaré
2 | propertylist.content=Les valeurs dans '${validatedValue.annotatorsName()}' et dans '${validatedValue.getPriority()}' ne sont pas les mêmes
3 | has.tokenizer=Au moins un des annotators doit être désigné comme tokenizer
4 | name.exists=Un des noms suivant '${validatedValue.implementationNames()}' n'existe pas comme implémentation
5 | file=Doit être un fichier
6 | readable=Doit être lisible
7 | writable=Doit être inscriptible
8 | already.exists=Ne doit pas déjà exister
9 | URL=Cette URL est non valide
10 | unique.name=Le nom de chaque annotator doit être unique: '${validatedValue.annotatorsName()}'
11 | one.of=Doit être un de {value}
12 | must.exists=Doit exister
--------------------------------------------------------------------------------
/adel-commons/src/main/resources/banner.txt:
--------------------------------------------------------------------------------
1 | o o__ __o o__ __o__/_ o
2 | <|> <| v\ <| v <|>
3 | / \ / \ <\ < > / \
4 | o/ \o \o/ \o | \o/
5 | <|__ __|> | |> o__/_ |
6 | / \ / \ // | / \
7 | o/ \o \o/ / \o/
8 | /v v\ | o | |
9 | /> <\ / \ __/> / \ _\o__/_ / \ _\o__/_
10 | --------------------------------------------------------------------------
11 |
--------------------------------------------------------------------------------
/adel-config-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | adel-config-server
7 | ${revision}
8 | adel-config-server
9 | http://adel.eurecom.fr
10 |
11 |
12 | fr.eurecom.adel
13 | adel-pom
14 | ${revision}
15 |
16 |
17 |
18 |
19 | org.springframework.cloud
20 | spring-cloud-config-server
21 | ${spring.cloud.config.server.version}
22 |
23 |
24 |
25 |
26 |
27 |
28 | org.springframework.boot
29 | spring-boot-maven-plugin
30 |
31 |
32 |
33 | io.fabric8
34 | docker-maven-plugin
35 | ${docker.maven.plugin.version}
36 |
37 |
38 |
39 | ${project.artifactId}
40 | jplu/${project.artifactId}:${revision}
41 |
42 | openjdk:oracle
43 | Julien Plu
44 |
45 | docker-assembly.xml
46 |
47 | /maven
48 |
49 | java -jar ${project.build.finalName}.jar
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | org.apache.maven.plugins
58 | maven-compiler-plugin
59 |
60 | 13
61 | 13
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/adel-config-server/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-config-server/src/main/java/fr/eurecom/adel/config/server/ADELConfigServer.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.config.server;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.cloud.config.server.EnableConfigServer;
6 |
7 | /**
8 | * @author Julien Plu on 2019-03-21.
9 | */
10 | @EnableConfigServer
11 | @SpringBootApplication
12 | public class ADELConfigServer {
13 | public static void main(final String... args) {
14 | SpringApplication.run(ADELConfigServer.class, args);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/adel-config-server/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | server.port: 8888
2 | spring:
3 | cloud:
4 | config:
5 | server:
6 | git:
7 | uri: https://github.com/jplu/ADEL-config
--------------------------------------------------------------------------------
/adel-discovery-server/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | adel-discovery-server
7 | ${revision}
8 | adel-discovery-server
9 | http://adel.eurecom.fr
10 |
11 |
12 | fr.eurecom.adel
13 | adel-pom
14 | ${revision}
15 |
16 |
17 |
18 |
19 |
20 | org.springframework.cloud
21 | spring-cloud-starter-config
22 | ${spring.cloud.starter.config.version}
23 |
24 |
25 |
26 | org.springframework.cloud
27 | spring-cloud-starter-netflix-eureka-server
28 | ${spring.cloud.starter.netflix.eureka.server.version}
29 |
30 |
31 |
32 | org.springframework.boot
33 | spring-boot-starter-aop
34 |
35 |
36 |
37 | org.springframework.retry
38 | spring-retry
39 | ${spring.retry.version}
40 |
41 |
42 |
43 | javax.xml.bind
44 | jaxb-api
45 | ${jaxb.api.version}
46 |
47 |
48 |
49 | com.sun.xml.bind
50 | jaxb-core
51 | ${jaxb.core.version}
52 |
53 |
54 |
55 | com.sun.xml.bind
56 | jaxb-impl
57 | ${jaxb.impl.version}
58 |
59 |
60 |
61 | javax.activation
62 | activation
63 | ${activation.version}
64 |
65 |
66 |
67 | javax.validation
68 | validation-api
69 | ${validation.api.version}
70 |
71 |
72 |
73 |
74 |
75 |
76 | org.springframework.boot
77 | spring-boot-maven-plugin
78 |
79 |
80 |
81 | io.fabric8
82 | docker-maven-plugin
83 | ${docker.maven.plugin.version}
84 |
85 |
86 |
87 | ${project.artifactId}
88 | jplu/${project.artifactId}:${revision}
89 |
90 | openjdk:oracle
91 | Julien Plu
92 |
93 | docker-assembly.xml
94 |
95 | /maven
96 |
97 | java -jar ${project.build.finalName}.jar
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 | org.apache.maven.plugins
106 | maven-compiler-plugin
107 |
108 | 13
109 | 13
110 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/adel-discovery-server/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-discovery-server/src/main/java/fr/eurecom/adel/discovery/server/ADELDiscoveryServer.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.discovery.server;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.cloud.netflix.eureka.server.EnableEurekaServer;
6 |
7 | /**
8 | * @author Julien Plu on 2019-03-21.
9 | */
10 | @EnableEurekaServer
11 | @SpringBootApplication
12 | public class ADELDiscoveryServer {
13 | public static void main(String[] args) {
14 | SpringApplication.run(ADELDiscoveryServer.class, args);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/adel-discovery-server/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | failFast: true
6 | retry:
7 | initialInterval: 3000
8 | multiplier: 1.3
9 | maxInterval: 5000
10 | maxAttempts: 20
11 | application:
12 | name: discovery-server
--------------------------------------------------------------------------------
/adel-hystrix-dashboard/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | adel-hystrix-dashboard
7 | ${revision}
8 | adel-hystrix-dashboard
9 | http://adel.eurecom.fr
10 |
11 |
12 | fr.eurecom.adel
13 | adel-pom
14 | ${revision}
15 |
16 |
17 |
18 |
19 | org.springframework.cloud
20 | spring-cloud-starter-config
21 | ${spring.cloud.starter.config.version}
22 |
23 |
24 |
25 | org.springframework.cloud
26 | spring-cloud-starter-netflix-hystrix-dashboard
27 | ${spring.cloud.starter.netflix.hystrix.dashboard.version}
28 |
29 |
30 |
31 | org.springframework.boot
32 | spring-boot-starter-aop
33 |
34 |
35 |
36 | org.springframework.retry
37 | spring-retry
38 | ${spring.retry.version}
39 |
40 |
41 |
42 |
43 |
44 |
45 | org.springframework.boot
46 | spring-boot-maven-plugin
47 |
48 |
49 |
50 | io.fabric8
51 | docker-maven-plugin
52 | ${docker.maven.plugin.version}
53 |
54 |
55 |
56 | ${project.artifactId}
57 | jplu/${project.artifactId}:${revision}
58 |
59 | openjdk:oracle
60 | Julien Plu
61 |
62 | docker-assembly.xml
63 |
64 | /maven
65 |
66 | java -jar ${project.build.finalName}.jar
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | org.apache.maven.plugins
75 | maven-compiler-plugin
76 |
77 | 13
78 | 13
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/adel-hystrix-dashboard/src/main/docker/docker-assembly.xml:
--------------------------------------------------------------------------------
1 |
3 | ${project.artifactId}
4 |
5 |
6 | target${file.separator}${project.build.finalName}.jar
7 | ${file.separator}
8 |
9 |
10 |
--------------------------------------------------------------------------------
/adel-hystrix-dashboard/src/main/java/fr/eurecom/adel/hystrix/dashboard/ADELHystrixDashboard.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.hystrix.dashboard;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.cloud.netflix.hystrix.dashboard.EnableHystrixDashboard;
6 | import org.springframework.stereotype.Controller;
7 | import org.springframework.web.bind.annotation.RequestMapping;
8 |
9 | /**
10 | * @author Julien Plu on 2019-04-02.
11 | */
12 | @SpringBootApplication
13 | @EnableHystrixDashboard
14 | @Controller
15 | public class ADELHystrixDashboard {
16 | public static void main(final String... args) {
17 | SpringApplication.run(ADELHystrixDashboard.class, args);
18 | }
19 |
20 | @RequestMapping("/")
21 | public String home() {
22 | return "forward:/hystrix";
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/adel-hystrix-dashboard/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | failFast: true
6 | retry:
7 | initialInterval: 3000
8 | multiplier: 1.3
9 | maxInterval: 5000
10 | maxAttempts: 20
11 | application:
12 | name: hystrix-dashboard
13 |
--------------------------------------------------------------------------------
/adel-indexing/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-indexing
6 | ${revision}
7 |
8 |
9 |
10 | org.apache.maven.plugins
11 | maven-compiler-plugin
12 |
13 | 13
14 | 13
15 |
16 |
17 |
18 |
19 | adel-indexing
20 | http://adel.eurecom.fr
21 |
22 |
23 | fr.eurecom.adel
24 | adel-pom
25 | ${revision}
26 |
27 |
28 |
--------------------------------------------------------------------------------
/adel-indexing/src/main/java/fr/eurecom/adel/indexing/App.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.indexing;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/adel-linking/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-linking
6 | ${revision}
7 |
8 |
9 |
10 | org.apache.maven.plugins
11 | maven-compiler-plugin
12 |
13 | 13
14 | 13
15 |
16 |
17 |
18 |
19 | adel-linking
20 | http://adel.eurecom.fr
21 |
22 |
23 | fr.eurecom.adel
24 | adel-pom
25 | ${revision}
26 |
27 |
28 |
--------------------------------------------------------------------------------
/adel-linking/src/main/java/fr/eurecom/adel/linking/App.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.linking;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/adel-recognition/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-recognition
6 | ${revision}
7 | adel-recognition
8 | http://adel.eurecom.fr
9 |
10 |
11 | fr.eurecom.adel
12 | adel-pom
13 | ${revision}
14 |
15 |
16 |
17 |
18 | fr.eurecom.adel
19 | adel-commons
20 | ${revision}
21 |
22 |
23 |
24 | org.projectlombok
25 | lombok
26 | ${lombok.version}
27 | provided
28 |
29 |
30 |
31 | org.junit.jupiter
32 | junit-jupiter-engine
33 | ${junit.jupiter.engine.version}
34 | test
35 |
36 |
37 |
38 | org.junit.jupiter
39 | junit-jupiter-api
40 | ${junit.jupiter.api.version}
41 | test
42 |
43 |
44 |
45 | org.junit.platform
46 | junit-platform-commons
47 | ${junit.platform.commons.version}
48 | test
49 |
50 |
51 |
52 | org.junit.platform
53 | junit-platform-engine
54 | ${junit.platform.engine.version}
55 | test
56 |
57 |
58 |
59 | edu.stanford.nlp
60 | stanford-corenlp
61 | ${stanford.corenlp.version}
62 |
63 |
64 |
65 | edu.stanford.nlp
66 | stanford-corenlp
67 | ${stanford.corenlp.version}
68 | models-english
69 |
70 |
71 |
72 | com.google.guava
73 | guava
74 | ${guava.version}
75 |
76 |
77 |
78 | org.jsoup
79 | jsoup
80 | ${jsoup.version}
81 |
82 |
83 |
84 |
85 |
86 |
87 | org.apache.maven.plugins
88 | maven-surefire-plugin
89 | ${maven-surefire-plugin.version}
90 |
91 |
92 | org.apache.maven.plugins
93 | maven-compiler-plugin
94 |
95 | 13
96 | 13
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/RecognitionSetup.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition;
2 |
3 | import org.springframework.beans.factory.annotation.Autowired;
4 | import org.springframework.context.annotation.Bean;
5 | import org.springframework.context.annotation.Configuration;
6 |
7 | import java.lang.reflect.Constructor;
8 | import java.lang.reflect.InvocationTargetException;
9 | import java.util.ArrayList;
10 | import java.util.HashMap;
11 | import java.util.List;
12 | import java.util.Map;
13 |
14 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
15 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
16 | import fr.eurecom.adel.commons.utils.ReflectionUtils;
17 | import fr.eurecom.adel.recognition.domain.repositories.AnnotatorRepository;
18 | import fr.eurecom.adel.recognition.domain.repositories.HashtagSegmentationRepository;
19 | import fr.eurecom.adel.recognition.domain.repositories.MentionOverlapResolutionRepository;
20 | import fr.eurecom.adel.recognition.domain.repositories.TypeOverlapResolutionRepository;
21 | import fr.eurecom.adel.recognition.domain.repositories.UserMentionDereferencingRepository;
22 | import fr.eurecom.adel.recognition.usecases.Annotator;
23 | import fr.eurecom.adel.recognition.usecases.OverlapResolution;
24 | import fr.eurecom.adel.recognition.usecases.RecognitionPipeline;
25 | import fr.eurecom.adel.recognition.usecases.TweetNormalization;
26 |
27 | /**
28 | * @author Julien Plu on 2018-11-25.
29 | */
30 | @Configuration
31 | public class RecognitionSetup {
32 | private RecognitionConfig recognitionConfig;
33 |
34 | @Autowired
35 | public final void setRecognitionConfig(final RecognitionConfig newRecognitionConfig) {
36 | this.recognitionConfig = newRecognitionConfig;
37 | }
38 |
39 | @Bean
40 | public RecognitionPipeline init() throws ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
41 | final Map annotatorsRepository = new HashMap<>();
42 | Constructor constructor;
43 |
44 | for (final AnnotatorConfig conf : this.recognitionConfig.getAnnotators()) {
45 | constructor = Class.forName(ReflectionUtils.getClassNameFromMethod(conf.getAnnotator(), "annotator")).getConstructor(String.class);
46 |
47 | final AnnotatorRepository annotatorRepository = (AnnotatorRepository) constructor.newInstance(conf.getAddress());
48 |
49 | annotatorsRepository.put(annotatorRepository, conf);
50 | }
51 |
52 | constructor = Class.forName(ReflectionUtils.getClassNameFromMethod(this.recognitionConfig.getTypeoverlapping().getMethod(), "typeoverlapresolution")).getConstructor();
53 |
54 | final TypeOverlapResolutionRepository typeOverlapResolutionRepository = (TypeOverlapResolutionRepository) constructor.newInstance();
55 |
56 | final List annotators = new ArrayList<>();
57 |
58 | for (final Map.Entry entry : annotatorsRepository.entrySet()) {
59 | annotators.add(new Annotator(entry.getKey(), entry.getValue()));
60 | }
61 |
62 | constructor = Class.forName(ReflectionUtils.getClassNameFromMethod(this.recognitionConfig.getMentionoverlapping(), "mentionoverlapresolution")).getConstructor();
63 |
64 | final MentionOverlapResolutionRepository mentionOverlapResolutionRepository = (MentionOverlapResolutionRepository) constructor.newInstance();
65 | int indexTokenizer = -1;
66 |
67 | for (final Annotator annotator : annotators) {
68 | if (annotator.getConfig().getTokenizer()) {
69 | indexTokenizer = annotators.indexOf(annotator);
70 |
71 | break;
72 | }
73 | }
74 |
75 | if (this.recognitionConfig.getTweetnormalization().getActivate()) {
76 | constructor = Class.forName(ReflectionUtils.getClassNameFromMethod(this.recognitionConfig.getTweetnormalization().getUsermention(), "usermentiondereferencing")).getConstructor();
77 |
78 | final UserMentionDereferencingRepository userMentionDereferencingRepository = (UserMentionDereferencingRepository) constructor.newInstance();
79 |
80 | constructor = Class.forName(ReflectionUtils.getClassNameFromMethod(this.recognitionConfig.getTweetnormalization().getHashtag(), "hashtagsegmentation")).getConstructor();
81 |
82 | final HashtagSegmentationRepository hashtagSegmentationRepository = (HashtagSegmentationRepository) constructor.newInstance();
83 |
84 | return new RecognitionPipeline(annotators, new OverlapResolution(typeOverlapResolutionRepository, this.recognitionConfig.getTypeoverlapping(), mentionOverlapResolutionRepository), new TweetNormalization(hashtagSegmentationRepository, userMentionDereferencingRepository), indexTokenizer, this.recognitionConfig);
85 | }
86 |
87 | return new RecognitionPipeline(annotators, new OverlapResolution(typeOverlapResolutionRepository, this.recognitionConfig.getTypeoverlapping(), mentionOverlapResolutionRepository), null, indexTokenizer, this.recognitionConfig);
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/configuration/AnnotatorConfig.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.configuration;
2 |
3 | import org.hibernate.validator.constraints.UniqueElements;
4 |
5 | import java.util.List;
6 |
7 | import javax.validation.constraints.NotBlank;
8 | import javax.validation.constraints.NotNull;
9 |
10 | import fr.eurecom.adel.commons.validators.OneOf;
11 | import fr.eurecom.adel.commons.validators.URL;
12 | import lombok.Getter;
13 | import lombok.Setter;
14 |
15 | /**
16 | * @author Julien Plu on 2018-11-25.
17 | */
18 | @Getter
19 | @Setter
20 | public class AnnotatorConfig {
21 | private @NotBlank @URL String address;
22 | private @NotBlank String annotator;
23 | private @NotBlank @OneOf({"NEEL", "CoNLL", "DUL", "Musicbrainz", "DBpedia", "MUC"}) String from;
24 | private @UniqueElements List tags;
25 | private @NotBlank String name;
26 | private @NotNull Boolean tokenizer;
27 | }
28 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/configuration/RecognitionConfig.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.configuration;
2 |
3 | import org.springframework.boot.context.properties.ConfigurationProperties;
4 | import org.springframework.stereotype.Component;
5 | import org.springframework.validation.annotation.Validated;
6 |
7 | import java.util.ArrayList;
8 | import java.util.Collection;
9 | import java.util.List;
10 | import java.util.stream.Collectors;
11 |
12 | import javax.validation.Valid;
13 | import javax.validation.constraints.NotEmpty;
14 | import javax.validation.constraints.NotNull;
15 |
16 | import fr.eurecom.adel.recognition.validators.ContentPriorityList;
17 | import fr.eurecom.adel.recognition.validators.HasTokenizer;
18 | import fr.eurecom.adel.recognition.validators.NameExistsForRecognition;
19 | import fr.eurecom.adel.recognition.validators.SizePriorityList;
20 | import fr.eurecom.adel.recognition.validators.UniqueName;
21 | import lombok.Getter;
22 | import lombok.Setter;
23 |
24 | @ConfigurationProperties("recognition")
25 | @Validated
26 | @Getter
27 | @Setter
28 | @SizePriorityList
29 | @ContentPriorityList
30 | @Component
31 | @NameExistsForRecognition
32 | public class RecognitionConfig {
33 | private @UniqueName @HasTokenizer @NotEmpty List<@Valid AnnotatorConfig> annotators = new ArrayList<>();
34 | private @NotNull @Valid TypeOverlappingConfig typeoverlapping;
35 | private @Valid TweetNormalizationConfig tweetnormalization;
36 | private @NotEmpty String mentionoverlapping;
37 |
38 | // Used in ValidationMessages.properties
39 | public final String implementationNames() {
40 | final Collection names = new ArrayList<>();
41 |
42 | for (final AnnotatorConfig annotatorConfig : this.annotators) {
43 | names.add(annotatorConfig.getAnnotator());
44 | }
45 |
46 | names.add(this.typeoverlapping.getMethod());
47 | names.add(this.mentionoverlapping);
48 | names.add(this.tweetnormalization.getUsermention());
49 | names.add(this.tweetnormalization.getHashtag());
50 |
51 | return names.toString();
52 | }
53 |
54 | // Used in ValidationMessages.properties
55 | public final String getPriority() {
56 | return this.typeoverlapping.getPriority().toString();
57 | }
58 |
59 | // Used in ValidationMessages.properties
60 | public final String annotatorsName() {
61 | return this.annotators.stream().map(AnnotatorConfig::getName).collect(Collectors.toList()).toString();
62 | }
63 | }
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/configuration/TweetNormalizationConfig.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.configuration;
2 |
3 | import javax.validation.constraints.NotNull;
4 |
5 | import lombok.Getter;
6 | import lombok.Setter;
7 |
8 | /**
9 | * @author Julien Plu on 2018-12-09.
10 | */
11 | @Getter
12 | @Setter
13 | public class TweetNormalizationConfig {
14 | private @NotNull String usermention;
15 | private @NotNull String hashtag;
16 | private @NotNull Boolean activate;
17 | }
18 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/configuration/TypeOverlappingConfig.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.configuration;
2 |
3 | import org.hibernate.validator.constraints.UniqueElements;
4 |
5 | import java.util.List;
6 |
7 | import javax.validation.constraints.NotBlank;
8 | import javax.validation.constraints.NotEmpty;
9 |
10 | import fr.eurecom.adel.commons.validators.OneOf;
11 | import lombok.Getter;
12 | import lombok.Setter;
13 |
14 | /**
15 | * @author Julien Plu on 2018-12-08.
16 | */
17 | @Getter
18 | @Setter
19 | public class TypeOverlappingConfig {
20 | private @NotBlank @OneOf({"NEEL", "CoNLL", "DUL", "Musicbrainz", "DBpedia", "MUC"}) String to;
21 | private @UniqueElements @NotEmpty List priority;
22 | private @NotBlank String method;
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/domain/repositories/AnnotatorRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.domain.repositories;
2 |
3 | import java.util.List;
4 |
5 | import fr.eurecom.adel.commons.datatypes.Entity;
6 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
7 | import fr.eurecom.adel.commons.datatypes.Token;
8 |
9 | /**
10 | * @author Julien Plu on 17/11/2018.
11 | */
12 | public interface AnnotatorRepository {
13 | List annotate(AnnotatorConfig config, String text);
14 | List> tokenize(String text);
15 | }
16 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/domain/repositories/HashtagSegmentationRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.domain.repositories;
2 |
3 | /**
4 | * @author Julien Plu on 2018-12-09.
5 | */
6 | @FunctionalInterface
7 | public interface HashtagSegmentationRepository {
8 | String segment(String hashtag);
9 | }
10 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/domain/repositories/MentionOverlapResolutionRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.domain.repositories;
2 |
3 | import java.util.List;
4 | import java.util.Map;
5 |
6 | import fr.eurecom.adel.commons.datatypes.Entity;
7 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
8 |
9 | /**
10 | * @author Julien Plu on 2018-12-17.
11 | */
12 | @FunctionalInterface
13 | public interface MentionOverlapResolutionRepository {
14 | List resolveMentionOverlapping(Map> documents);
15 | }
16 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/domain/repositories/TypeOverlapResolutionRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.domain.repositories;
2 |
3 | import java.util.List;
4 |
5 | import fr.eurecom.adel.commons.datatypes.Entity;
6 | import fr.eurecom.adel.recognition.configuration.TypeOverlappingConfig;
7 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
8 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
9 |
10 | /**
11 | * @author Julien Plu on 2018-11-26.
12 | */
13 | @FunctionalInterface
14 | public interface TypeOverlapResolutionRepository {
15 | void resolveTypeOverlapping(TypeOverlappingConfig config, List entities) throws MappingNotExistsException, TypeNotExistsException;
16 | }
17 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/domain/repositories/UserMentionDereferencingRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.domain.repositories;
2 |
3 | /**
4 | * @author Julien Plu on 2018-12-09.
5 | */
6 | @FunctionalInterface
7 | public interface UserMentionDereferencingRepository {
8 | String dereference(String userMention);
9 | }
10 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/exceptions/MappingNotExistsException.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.exceptions;
2 |
3 | /**
4 | * @author Julien Plu on 2019-03-13.
5 | */
6 | public class MappingNotExistsException extends Exception {
7 | private static final long serialVersionUID = -1638450642016819040L;
8 |
9 | public MappingNotExistsException(final String errorMessage) {
10 | super(errorMessage);
11 | }
12 |
13 | public MappingNotExistsException(final String errorMessage, final Throwable err) {
14 | super(errorMessage, err);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/exceptions/TypeNotExistsException.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.exceptions;
2 |
3 | /**
4 | * @author Julien Plu on 2019-03-13.
5 | */
6 | public class TypeNotExistsException extends Exception {
7 | private static final long serialVersionUID = -2101902359093722753L;
8 |
9 | public TypeNotExistsException(final String errorMessage) {
10 | super(errorMessage);
11 | }
12 |
13 | public TypeNotExistsException(final String errorMessage, final Throwable err) {
14 | super(errorMessage, err);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/annotator/jsonapi/JSONAPIAnnotatorRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.annotator.jsonapi;
2 |
3 | import org.apache.commons.io.IOUtils;
4 | import org.apache.http.HttpEntity;
5 | import org.apache.http.HttpResponse;
6 | import org.apache.http.client.HttpClient;
7 | import org.apache.http.client.methods.HttpPost;
8 | import org.apache.http.entity.ContentType;
9 | import org.apache.http.entity.StringEntity;
10 | import org.apache.http.impl.client.HttpClientBuilder;
11 | import org.json.JSONArray;
12 | import org.json.JSONObject;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 |
16 | import java.io.IOException;
17 | import java.nio.charset.StandardCharsets;
18 | import java.util.ArrayList;
19 | import java.util.List;
20 |
21 | import fr.eurecom.adel.commons.datatypes.Entity;
22 | import fr.eurecom.adel.commons.datatypes.Token;
23 | import fr.eurecom.adel.commons.validators.Name;
24 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
25 | import fr.eurecom.adel.recognition.domain.repositories.AnnotatorRepository;
26 |
27 | /**
28 | * @author Julien Plu on 05/06/19.
29 | */
30 | @Name(name = "JSONAPI")
31 | public class JSONAPIAnnotatorRepository implements AnnotatorRepository {
32 | private static final Logger logger = LoggerFactory.getLogger(JSONAPIAnnotatorRepository.class);
33 | private final String recognizeAddress;
34 | private final String tokenizeAddress;
35 |
36 | public JSONAPIAnnotatorRepository(final String newAddress) {
37 | this.recognizeAddress = newAddress + "recognize";
38 | this.tokenizeAddress = newAddress + "tokenize";
39 | }
40 |
41 | @Override
42 | public final List annotate(final AnnotatorConfig config, final String text) {
43 | final HttpClient httpClient = HttpClientBuilder.create().build();
44 | final List recognizedEntities = new ArrayList<>();
45 |
46 | try {
47 | final HttpPost post = new HttpPost(this.recognizeAddress);
48 | final String inputJson = "{\"text\": \"" + text + "\"}";
49 | final HttpEntity requestEntity = new StringEntity(inputJson, ContentType.APPLICATION_JSON);
50 |
51 | post.setEntity(requestEntity);
52 |
53 | final HttpResponse response = httpClient.execute(post);
54 |
55 | if (null != response.getEntity().getContent()) {
56 | final String answer = IOUtils.toString(response.getEntity().getContent(),
57 | StandardCharsets.UTF_8);
58 | final JSONObject json = new JSONObject(answer);
59 | final JSONArray entities = json.getJSONArray("entities");
60 |
61 |
62 | for (int i = 0; i < entities.length(); i++) {
63 | final JSONObject entity = (JSONObject) entities.get(i);
64 |
65 | recognizedEntities.add(Entity.builder()
66 | .phrase(entity.getString("phrase"))
67 | .cleanPhrase(entity.getString("phrase"))
68 | .type(entity.getString("type"))
69 | .startOffset(entity.getInt("startOffset"))
70 | .endOffset(entity.getInt("endOffset"))
71 | .build());
72 | }
73 | }
74 | } catch (final IOException ex) {
75 | JSONAPIAnnotatorRepository.logger.error("Issue to connect to {}", this.recognizeAddress, ex);
76 | }
77 |
78 | return recognizedEntities;
79 | }
80 |
81 | @Override
82 | public final List> tokenize(final String text) {
83 | final HttpClient httpClient = HttpClientBuilder.create().build();
84 | final List> document = new ArrayList<>();
85 |
86 | try {
87 | final HttpPost post = new HttpPost(this.tokenizeAddress);
88 | final String inputJson = "{\"text\": \"" + text + "\"}";
89 | final HttpEntity requestEntity = new StringEntity(inputJson, ContentType.APPLICATION_JSON);
90 |
91 | post.setEntity(requestEntity);
92 |
93 | final HttpResponse response = httpClient.execute(post);
94 |
95 | if (null != response.getEntity().getContent()) {
96 | final String answer = IOUtils.toString(response.getEntity().getContent(),
97 | StandardCharsets.UTF_8);
98 | final JSONObject json = new JSONObject(answer);
99 | final JSONArray sentences = json.getJSONArray("sentences");
100 |
101 |
102 | for (int i = 0; i < sentences.length(); i++) {
103 | final JSONObject sentence = (JSONObject) sentences.get(i);
104 | final JSONArray apiTokens = sentence.getJSONArray("tokens");
105 | final List tokens = new ArrayList<>();
106 |
107 | for (int j = 0; j < apiTokens.length(); j++) {
108 | final JSONObject token = (JSONObject) apiTokens.get(j);
109 |
110 | tokens.add(Token.builder()
111 | .value(token.getString("value"))
112 | .begin(token.getInt("begin"))
113 | .end(token.getInt("end"))
114 | .build());
115 | }
116 |
117 | document.add(tokens);
118 | }
119 | }
120 | } catch (final IOException ex) {
121 | JSONAPIAnnotatorRepository.logger.error("Issue to connect to {}", this.tokenizeAddress , ex);
122 | }
123 |
124 | return document;
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/annotator/stanfordcorenlp/StanfordCoreNLPAnnotatorRepository.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.annotator.stanfordcorenlp;
2 |
3 | import edu.stanford.nlp.ling.CoreLabel;
4 | import edu.stanford.nlp.pipeline.CoreSentence;
5 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
6 | import fr.eurecom.adel.commons.datatypes.Entity;
7 | import fr.eurecom.adel.commons.datatypes.Token;
8 | import fr.eurecom.adel.recognition.domain.repositories.AnnotatorRepository;
9 |
10 | import java.util.ArrayList;
11 | import java.util.List;
12 |
13 | import edu.stanford.nlp.pipeline.CoreDocument;
14 | import edu.stanford.nlp.pipeline.CoreEntityMention;
15 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
16 | import fr.eurecom.adel.commons.validators.Name;
17 |
18 | /**
19 | * @author Julien Plu on 17/11/2018.
20 | */
21 | @Name(name = "StanfordCoreNLP")
22 | public class StanfordCoreNLPAnnotatorRepository implements AnnotatorRepository {
23 | private final StanfordCoreNLP pipeline;
24 |
25 | public StanfordCoreNLPAnnotatorRepository(final String path) {
26 | this.pipeline = new StanfordCoreNLP(path.replace("classpath:", ""));
27 | }
28 |
29 | @Override
30 | public final List annotate(final AnnotatorConfig config, final String text) {
31 | final CoreDocument doc = new CoreDocument(text);
32 |
33 | this.pipeline.annotate(doc);
34 |
35 | final List entityMentions = doc.entityMentions();
36 | final List entities = new ArrayList<>();
37 |
38 | if (null != entityMentions) {
39 | for (final CoreEntityMention entityMention : entityMentions) {
40 | //TODO: add score with entityTypeConfidences
41 | if (null == config.getTags() || config.getTags().contains(entityMention.entityType())) {
42 | entities.add(
43 | Entity.builder()
44 | .phrase(entityMention.text())
45 | .cleanPhrase(entityMention.canonicalEntityMention().orElse(entityMention).text())
46 | .type(entityMention.entityType())
47 | .startOffset(entityMention.charOffsets().first)
48 | .endOffset(entityMention.charOffsets().second)
49 | .build());
50 | }
51 | }
52 | }
53 |
54 | return entities;
55 | }
56 |
57 | @Override
58 | public final List> tokenize(final String text) {
59 | final CoreDocument doc = new CoreDocument(text);
60 |
61 | this.pipeline.annotate(doc);
62 |
63 | final List sentences = doc.sentences();
64 | final List> document = new ArrayList<>();
65 |
66 | for (final CoreSentence sentence : sentences) {
67 | final List tokens = new ArrayList<>();
68 |
69 | for (final CoreLabel label : sentence.tokens()) {
70 | tokens.add(
71 | Token.builder()
72 | .value(label.value())
73 | .begin(label.beginPosition())
74 | .end(label.endPosition())
75 | .build());
76 | }
77 |
78 | document.add(tokens);
79 | }
80 |
81 | return document;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/hashtagsegmentation/dictionarybased/DictionaryBasedHashtagSegmentation.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.hashtagsegmentation.dictionarybased;
2 |
3 | import org.apache.commons.lang3.tuple.ImmutablePair;
4 |
5 | import java.io.BufferedReader;
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.InputStreamReader;
9 | import java.nio.charset.StandardCharsets;
10 | import java.util.ArrayList;
11 | import java.util.Collections;
12 | import java.util.HashMap;
13 | import java.util.List;
14 | import java.util.Locale;
15 | import java.util.Map;
16 |
17 | import org.apache.commons.lang3.tuple.Pair;
18 | import org.slf4j.Logger;
19 | import org.slf4j.LoggerFactory;
20 | import org.springframework.core.io.Resource;
21 | import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
22 | import org.springframework.core.io.support.ResourcePatternResolver;
23 |
24 | import java.util.Collection;
25 | import java.util.Objects;
26 | import java.util.stream.Collectors;
27 |
28 | import fr.eurecom.adel.recognition.domain.repositories.HashtagSegmentationRepository;
29 | import fr.eurecom.adel.commons.validators.Name;
30 |
31 | /**
32 | * @author Julien Plu on 2018-12-09.
33 | */
34 | @Name(name = "Dictionary")
35 | public class DictionaryBasedHashtagSegmentation implements HashtagSegmentationRepository {
36 | private static final Logger logger = LoggerFactory.getLogger(DictionaryBasedHashtagSegmentation.class);
37 | private final Map unigrams;
38 | private final Map bigrams;
39 | // Total number of words in the Google Billion words corpus
40 | private static final Double TOTAL = 1024908267229.0;
41 |
42 | public DictionaryBasedHashtagSegmentation() {
43 | this.unigrams = new HashMap<>();
44 | this.bigrams = new HashMap<>();
45 |
46 | this.loadAllDictionaries();
47 | }
48 |
49 | @Override
50 | public final String segment(final String hashtag) {
51 | final StringBuilder wellFormed = new StringBuilder(String.join(" ", this.isegment(hashtag)));
52 | int count = 0;
53 |
54 | for (int i = 0;i < wellFormed.length();i++) {
55 | if (!" ".equals(Character.toString(wellFormed.charAt(i)))) {
56 | if (Character.isUpperCase(hashtag.charAt(count))) {
57 | wellFormed.setCharAt(i, hashtag.charAt(count));
58 | }
59 |
60 | count++;
61 | }
62 | }
63 |
64 | return wellFormed.toString();
65 | }
66 |
67 | private Iterable isegment(final String hashtag) {
68 | final String lowerCase = hashtag.toLowerCase(Locale.ENGLISH);
69 | int size = lowerCase.length();
70 |
71 | if (250 < lowerCase.length()) {
72 | size = 250;
73 | }
74 |
75 | String prefix = "";
76 |
77 | for (int offset = 0;offset < lowerCase.length();offset += size) {
78 | final String chunk = lowerCase.substring(offset, offset + size);
79 |
80 | final List chunkWords = new ArrayList<>(this.search(prefix + chunk, "").getRight());
81 |
82 | if (5 > chunkWords.size()) {
83 | prefix = String.join("", chunkWords);
84 | } else {
85 | prefix = String.join("", chunkWords.subList(chunkWords.size() - 5, chunkWords.size()));
86 | chunkWords.subList(0, 5).clear();
87 | }
88 | }
89 |
90 | return this.search(prefix, "").getRight();
91 | }
92 |
93 | private void loadAllDictionaries() {
94 | try {
95 | final ClassLoader cl = this.getClass().getClassLoader();
96 | final ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(cl);
97 | final Resource[] resources = resolver.getResources("classpath:dictionaries/**/*.tsv");
98 |
99 | for (final Resource resource: resources) {
100 | this.readDictionary(this.readResource(resource.getInputStream()), Objects.requireNonNull(resource.getFilename()));
101 | }
102 | } catch (final IOException ex) {
103 | DictionaryBasedHashtagSegmentation.logger.error("", ex);
104 | }
105 | }
106 |
107 | private List readResource(final InputStream stream) {
108 | List lines = new ArrayList<>();
109 |
110 | try (final BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
111 | lines = br.lines().parallel().collect(Collectors.toList());
112 | } catch (final IOException ex) {
113 | DictionaryBasedHashtagSegmentation.logger.error("", ex);
114 | }
115 |
116 | return lines;
117 | }
118 |
119 | private void readDictionary(final Iterable lines, final String name) {
120 | if (name.contains("unigrams")) {
121 | for (final String line : lines) {
122 | this.unigrams.put(line.split("\t")[0], Double.valueOf(line.split("\t")[1]));
123 | }
124 | } else {
125 | for (final String line : lines) {
126 | this.bigrams.put(line.split("\t")[0], Double.valueOf(line.split("\t")[1]));
127 | }
128 | }
129 | }
130 |
131 | private Double score(final String word, final String prev) {
132 | if (prev.isEmpty()) {
133 | if (this.unigrams.containsKey(word)) {
134 | return this.unigrams.get(word) / DictionaryBasedHashtagSegmentation.TOTAL;
135 | } else {
136 | return 10.0 / (DictionaryBasedHashtagSegmentation.TOTAL * StrictMath.pow(10.0, Double.parseDouble(Integer.toString(word.length()))));
137 | }
138 | } else {
139 | final String bigram = prev + ' ' + word;
140 |
141 | if (this.bigrams.containsKey(bigram) && this.unigrams.containsKey(prev)) {
142 | return this.bigrams.get(bigram) / DictionaryBasedHashtagSegmentation.TOTAL / this.score(prev, "");
143 | } else {
144 | return this.score(word, "");
145 | }
146 | }
147 | }
148 |
149 | private Pair> search(final String text, final String prev) {
150 | if (text.isEmpty()) {
151 | return ImmutablePair.of(0.0, Collections.emptyList());
152 | }
153 |
154 | final List>> candidates = this.candidates(text, prev);
155 | int maxIndex = 0;
156 | double value = candidates.get(0).getLeft();
157 |
158 | for (int i = 1;i < candidates.size();i++) {
159 | if (candidates.get(i).getLeft() > value) {
160 | value = candidates.get(i).getLeft();
161 | maxIndex = i;
162 | }
163 | }
164 |
165 | return candidates.get(maxIndex);
166 | }
167 |
168 | private List>> candidates(final String text, final String prev) {
169 | final List>> candidates = new ArrayList<>();
170 | final Map, Pair>> memo = new HashMap<>();
171 |
172 | for (final Pair pair : this.divide(text)) {
173 | final double prefixScore = StrictMath.log10(this.score(pair.getLeft(), prev));
174 |
175 | if (!memo.containsKey(pair)) {
176 | memo.put(pair, this.search(pair.getRight(), pair.getLeft()));
177 | }
178 |
179 | final List segments = new ArrayList<>();
180 |
181 | segments.add(pair.getLeft());
182 | segments.addAll(memo.get(pair).getRight());
183 |
184 | candidates.add(ImmutablePair.of(prefixScore + memo.get(pair).getLeft(), segments));
185 | }
186 |
187 | return candidates;
188 | }
189 |
190 | private Iterable> divide(final String text) {
191 | final Collection> segments = new ArrayList<>();
192 |
193 | for (int pos = 1;pos < Math.min(text.length(), 24) + 1;pos++) {
194 | segments.add(ImmutablePair.of(text.substring(0, pos), text.substring(pos)));
195 | }
196 |
197 | return segments;
198 | }
199 | }
200 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/mentionoverlapresolution/merge/MergeMentionOverlapResolution.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.mentionoverlapresolution.merge;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collection;
5 | import java.util.List;
6 | import java.util.Locale;
7 | import java.util.Map;
8 |
9 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
10 | import fr.eurecom.adel.commons.datatypes.Entity;
11 | import fr.eurecom.adel.recognition.domain.repositories.MentionOverlapResolutionRepository;
12 | import fr.eurecom.adel.commons.validators.Name;
13 |
14 | /**
15 | * @author Julien Plu on 2018-12-17.
16 | */
17 | @Name(name = "Merge")
18 | public class MergeMentionOverlapResolution implements MentionOverlapResolutionRepository {
19 | @Override
20 | public final List resolveMentionOverlapping(
21 | final Map> documents) {
22 | final List finalEntities = new ArrayList<>();
23 |
24 | for (final Map.Entry> document : documents.entrySet()) {
25 | for (final Entity old : document.getValue()) {
26 | final List overlapIndexes = this.overlappingIndex(finalEntities, old);
27 |
28 | if (overlapIndexes.isEmpty()) {
29 | if (!old.getType().isEmpty()) {
30 | old.setType(
31 | old.getType()
32 | + "from"
33 | + document.getKey().getFrom()
34 | + "--"
35 | + document.getKey().getName());
36 | }
37 |
38 | finalEntities.add(old);
39 | } else {
40 | final Collection newEntities = new ArrayList<>();
41 | Entity newEntity = this.entityResolution(finalEntities.get(overlapIndexes.get(0)), old,
42 | document.getKey());
43 |
44 | newEntities.add(newEntity);
45 |
46 | for (int i = 1;i < overlapIndexes.size();i++) {
47 | newEntity = this.entityResolution(finalEntities.get(overlapIndexes.get(i)), old,
48 | document.getKey());
49 | newEntities.add(newEntity);
50 | }
51 |
52 | if (1 < newEntities.size()) {
53 | final StringBuilder newType = new StringBuilder();
54 |
55 | for (final Entity entity : newEntities) {
56 | final String[] splitTypes = entity.getType().split("\\|\\|");
57 |
58 | for (final String splitType : splitTypes) {
59 | if (!newType.toString().contains(splitType)) {
60 | newType.append(splitType);
61 | newType.append("||");
62 | }
63 | }
64 | }
65 |
66 | newEntity.setType(newType.substring(0, newType.length() - 2));
67 | }
68 |
69 | final Collection entitiesToRemove = new ArrayList<>();
70 |
71 | for (final Integer overlapIndexe : overlapIndexes) {
72 | entitiesToRemove.add(finalEntities.get(overlapIndexe));
73 | }
74 |
75 | finalEntities.removeAll(entitiesToRemove);
76 | finalEntities.add(newEntity);
77 | }
78 | }
79 | }
80 |
81 | return finalEntities;
82 | }
83 |
84 | private List overlappingIndex(final List finalEntities, final Entity old) {
85 | final List indexes = new ArrayList<>();
86 |
87 | for (final Entity entity : finalEntities) {
88 | if (this.isOverlap(entity, old)) {
89 | indexes.add(finalEntities.indexOf(entity));
90 | }
91 | }
92 |
93 | return indexes;
94 | }
95 |
96 | private boolean isOverlap(final Entity e1, final Entity e2) {
97 | boolean res = (e1.getStartOffset() > e2.getStartOffset()) &&
98 | (e1.getStartOffset() < e2.getEndOffset());
99 |
100 | if ((e1.getStartOffset().intValue() == e2.getStartOffset().intValue())
101 | || (e1.getEndOffset().intValue() == e2.getEndOffset().intValue())) {
102 | res = true;
103 | }
104 |
105 | if ((e2.getStartOffset() > e1.getStartOffset()) && (e2.getStartOffset() < e1.getEndOffset())) {
106 | res = true;
107 | }
108 |
109 | return res;
110 | }
111 |
112 | private Entity entityResolution(final Entity e1, final Entity e2, final AnnotatorConfig config) {
113 | final Entity finalEntity = Entity.builder().build();
114 |
115 | if (e1.getStartOffset() <= e2.getStartOffset()) {
116 | finalEntity.setStartOffset(e1.getStartOffset());
117 | } else {
118 | finalEntity.setStartOffset(e2.getStartOffset());
119 | }
120 |
121 | if (e1.getEndOffset() >= e2.getEndOffset()) {
122 | finalEntity.setEndOffset(e1.getEndOffset());
123 | } else {
124 | finalEntity.setEndOffset(e2.getEndOffset());
125 | }
126 |
127 | if (e1.getStartOffset() < e2.getStartOffset()) {
128 | finalEntity.setPhrase(this.concat(e1.getPhrase(), e2.getPhrase()));
129 | finalEntity.setCleanPhrase(this.concat(e1.getPhrase(), e2.getPhrase()));
130 | } else {
131 | finalEntity.setPhrase(this.concat(e2.getPhrase(), e1.getPhrase()));
132 | finalEntity.setCleanPhrase(this.concat(e2.getCleanPhrase(), e1.getCleanPhrase()));
133 | }
134 |
135 | if (e1.getType().isEmpty() && e2.getType().isEmpty()) {
136 | finalEntity.setType("");
137 | } else if (e2.getType().isEmpty() && !e1.getType().isEmpty()) {
138 | finalEntity.setType(e1.getType());
139 | } else if (!e2.getType().isEmpty() && e1.getType().isEmpty()) {
140 | finalEntity.setType(e2.getType() + "from" + config.getFrom() + "--" + config.getName());
141 | } else {
142 | finalEntity.setType(e1.getType() + "||" + e2.getType() + "from" + config.getFrom() + "--" + config.getName());
143 | }
144 |
145 | return finalEntity;
146 | }
147 |
148 | private String concat(final String s1, final String s2) {
149 | if (s2.toLowerCase(Locale.getDefault()).contains(s1.toLowerCase(Locale.getDefault()))) {
150 | return s2;
151 | }
152 |
153 | if (s1.toLowerCase(Locale.getDefault()).contains(s2.toLowerCase(Locale.getDefault()))) {
154 | return s1;
155 | }
156 |
157 | final int len = Math.min(s1.length(), s2.length());
158 | int index = -1;
159 |
160 | for (int i = len; 0 < i; i--) {
161 | final String substring = s2.substring(0, i);
162 |
163 | if (s1.toLowerCase(Locale.getDefault()).endsWith(substring.toLowerCase(Locale.getDefault()))) {
164 | index = i;
165 | break;
166 | }
167 | }
168 |
169 | final StringBuilder sb = new StringBuilder(s1);
170 |
171 | if (index <= s2.length()) {
172 | sb.append(s2.substring(index));
173 | }
174 |
175 | return sb.toString();
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/typeoverlapresolution/majorityvoting/MajorityVotingTypeOverlapResolution.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.typeoverlapresolution.majorityvoting;
2 |
3 | import com.google.common.io.Files;
4 |
5 | import org.slf4j.Logger;
6 | import org.slf4j.LoggerFactory;
7 | import org.springframework.core.io.Resource;
8 | import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
9 | import org.springframework.core.io.support.ResourcePatternResolver;
10 |
11 | import java.io.BufferedReader;
12 | import java.io.IOException;
13 | import java.io.InputStream;
14 | import java.io.InputStreamReader;
15 | import java.nio.charset.StandardCharsets;
16 | import java.util.ArrayList;
17 | import java.util.Arrays;
18 | import java.util.Collections;
19 | import java.util.Comparator;
20 | import java.util.HashMap;
21 | import java.util.List;
22 | import java.util.Map;
23 | import java.util.Objects;
24 | import java.util.stream.Collectors;
25 |
26 | import fr.eurecom.adel.recognition.configuration.TypeOverlappingConfig;
27 | import fr.eurecom.adel.commons.datatypes.Entity;
28 | import fr.eurecom.adel.recognition.domain.repositories.TypeOverlapResolutionRepository;
29 | import fr.eurecom.adel.commons.validators.Name;
30 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
31 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
32 |
33 | /**
34 | * @author Julien Plu on 2018-11-26.
35 | */
36 | @Name(name = "MajorityVoting")
37 | public class MajorityVotingTypeOverlapResolution implements TypeOverlapResolutionRepository {
38 | private static final Logger logger = LoggerFactory.getLogger(MajorityVotingTypeOverlapResolution.class);
39 | private final Map> typesMapping;
40 |
41 | public MajorityVotingTypeOverlapResolution() {
42 | this.typesMapping = new HashMap<>();
43 | this.init();
44 | }
45 |
46 | private void init() {
47 | try {
48 | final ClassLoader cl = this.getClass().getClassLoader();
49 | final ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(cl);
50 | final Resource[] resources = resolver.getResources("classpath:mappings/**/*.map");
51 |
52 | for (final Resource resource: resources) {
53 | final List lines = this.readResource(resource.getInputStream());
54 | final Map mapping = new HashMap<>();
55 |
56 | for (final String line : lines) {
57 | final String left = line.split("\t")[0];
58 | final String right = line.split("\t")[1];
59 |
60 | for (final String type : left.split(",")) {
61 | mapping.put(type, right);
62 | }
63 | }
64 |
65 | this.typesMapping.put(Files.getNameWithoutExtension(Objects.requireNonNull(resource.getFilename())), mapping);
66 | }
67 | } catch (final IOException ex) {
68 | MajorityVotingTypeOverlapResolution.logger.error("Issue to read the mapping files", ex);
69 | }
70 | }
71 |
72 | private List readResource(final InputStream stream) {
73 | List lines = new ArrayList<>();
74 |
75 | try (final BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
76 | lines = br.lines().parallel().collect(Collectors.toList());
77 | } catch (final IOException ex) {
78 | MajorityVotingTypeOverlapResolution.logger.error("Issue to transform the stream of a dictionary into a list", ex);
79 | }
80 |
81 | return lines;
82 | }
83 |
84 | @Override
85 | public final void resolveTypeOverlapping(final TypeOverlappingConfig config, final List entities) throws MappingNotExistsException, TypeNotExistsException {
86 | for (final Entity entity : entities) {
87 | StringBuilder newType = new StringBuilder();
88 |
89 | for (final String singleType : entity.getType().split("\\|\\|")) {
90 | final String[] splittedType = singleType.split("from");
91 |
92 | if (newType.toString().isEmpty()) {
93 | newType = new StringBuilder(this.resolveTypeMapping(splittedType[1].split("--")[0], config.getTo(), splittedType[0]));
94 | } else {
95 | newType.append("||");
96 | newType.append(this.resolveTypeMapping(splittedType[1].split("--")[0], config.getTo(), splittedType[0]));
97 | }
98 |
99 | newType.append("--");
100 | newType.append(config.getPriority().indexOf(splittedType[1].split("--")[1]));
101 | }
102 |
103 | entity.setType(newType.toString());
104 | }
105 |
106 | this.majorityVote(entities);
107 | }
108 |
109 | private void majorityVote(final Iterable newEntities) {
110 | for (final Entity entity : newEntities) {
111 | final Map map = Arrays.stream(entity.getType().split("\\|\\|")).collect(Collectors.groupingBy(s -> s.split("--")[0], Collectors.counting()));
112 |
113 | if (Collections.max(map.values()).equals(Collections.min(map.values()))) {
114 | final List types = Arrays.asList(entity.getType().split("\\|\\|"));
115 |
116 | types.sort(Comparator.comparing(newS -> Integer.valueOf(newS.split("--")[1])));
117 |
118 | entity.setType(types.get(0).split("--")[0]);
119 | } else {
120 | entity.setType(map.entrySet().stream().max((entry1, entry2) -> entry1.getValue() > entry2.getValue() ? 1 : -1).get().getKey());
121 | }
122 | }
123 | }
124 |
125 | private String resolveTypeMapping(final String from, final String to, final String type) throws MappingNotExistsException, TypeNotExistsException {
126 | final String newType;
127 |
128 | if (from.equals(to)) {
129 | newType = type;
130 | } else {
131 | if (!this.typesMapping.containsKey(from + '2' + to)) {
132 | throw new MappingNotExistsException("The mapping file " + from + '2' + to + " does not exists");
133 | }
134 |
135 | if (!this.typesMapping.get(from + '2' + to).containsKey(type)) {
136 | throw new TypeNotExistsException("The type " + type + " does not exists in the vocabulary " + from);
137 | }
138 |
139 | newType = this.typesMapping.get(from + '2' + to).get(type);
140 | }
141 |
142 | return newType;
143 | }
144 | }
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/implementation/repositories/usermentiondereferencing/httpquery/HTTPQueryUserMentionDereferencing.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.usermentiondereferencing.httpquery;
2 |
3 | import java.io.IOException;
4 |
5 | import org.jsoup.Jsoup;
6 | import org.jsoup.nodes.Document;
7 | import org.jsoup.nodes.Element;
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | import fr.eurecom.adel.recognition.domain.repositories.UserMentionDereferencingRepository;
12 | import fr.eurecom.adel.commons.validators.Name;
13 |
14 | /**
15 | * @author Julien Plu on 2018-12-09.
16 | */
17 | @Name(name = "HTTPQuery")
18 | public class HTTPQueryUserMentionDereferencing implements UserMentionDereferencingRepository {
19 | private static final Logger logger = LoggerFactory.getLogger(HTTPQueryUserMentionDereferencing.class);
20 |
21 | @Override
22 | public final String dereference(final String userMention) {
23 | String userName = "";
24 |
25 | try {
26 | final Document doc = Jsoup.connect("https://twitter.com/" + userMention).ignoreHttpErrors(true).get();
27 | final Element title = doc.select("title").first();
28 |
29 | if ("Twitter / ?".equals(title.text())) {
30 | userName = userMention;
31 | } else {
32 | userName = title.text().split(" \\(")[0];
33 | }
34 | } catch (final IOException ex) {
35 | HTTPQueryUserMentionDereferencing.logger.error("", ex);
36 | }
37 |
38 | return userName;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/usecases/Annotator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.usecases;
2 |
3 | import java.util.List;
4 |
5 | import fr.eurecom.adel.commons.datatypes.Entity;
6 | import fr.eurecom.adel.commons.datatypes.Token;
7 | import fr.eurecom.adel.recognition.domain.repositories.AnnotatorRepository;
8 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
9 |
10 | /**
11 | * @author Julien Plu on 17/11/2018.
12 | */
13 | public class Annotator {
14 | private final AnnotatorRepository annotatorRepository;
15 | private final AnnotatorConfig config;
16 |
17 | public Annotator(final AnnotatorRepository newAnnotatorRepository, final AnnotatorConfig newConfig) {
18 | this.annotatorRepository = newAnnotatorRepository;
19 | this.config = newConfig;
20 | }
21 |
22 | final List annotate(final String text) {
23 | return this.annotatorRepository.annotate(this.config, text);
24 | }
25 |
26 | final List> tokenize(final String text) {
27 | return this.annotatorRepository.tokenize(text);
28 | }
29 |
30 | public final AnnotatorConfig getConfig() {
31 | return this.config;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/usecases/OverlapResolution.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.usecases;
2 |
3 | import java.util.List;
4 | import java.util.Map;
5 |
6 | import fr.eurecom.adel.commons.datatypes.Entity;
7 | import fr.eurecom.adel.recognition.domain.repositories.MentionOverlapResolutionRepository;
8 | import fr.eurecom.adel.recognition.domain.repositories.TypeOverlapResolutionRepository;
9 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
10 | import fr.eurecom.adel.recognition.configuration.TypeOverlappingConfig;
11 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
12 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
13 |
14 | /**
15 | * @author Julien Plu on 2018-11-26.
16 | */
17 | public class OverlapResolution {
18 | private final MentionOverlapResolutionRepository mentionOverlapResolutionRepository;
19 | private final TypeOverlapResolutionRepository typeOverlapResolutionRepository;
20 | private final TypeOverlappingConfig config;
21 |
22 | public OverlapResolution(final TypeOverlapResolutionRepository newTypeOverlapResolutionRepository, final TypeOverlappingConfig newConfig, final MentionOverlapResolutionRepository newMentionOverlapResolutionRepository) {
23 | this.mentionOverlapResolutionRepository = newMentionOverlapResolutionRepository;
24 | this.typeOverlapResolutionRepository = newTypeOverlapResolutionRepository;
25 | this.config = newConfig;
26 | }
27 |
28 | final List resolveOverlap(final Map> documents) throws MappingNotExistsException, TypeNotExistsException {
29 | final List newEntities = this.mentionOverlapResolutionRepository.resolveMentionOverlapping(documents);
30 |
31 | this.typeOverlapResolutionRepository.resolveTypeOverlapping(this.config, newEntities);
32 |
33 | return newEntities;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/usecases/RecognitionPipeline.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.usecases;
2 |
3 | import org.apache.commons.lang3.StringUtils;
4 | import org.apache.commons.lang3.tuple.Pair;
5 |
6 | import java.util.ArrayList;
7 | import java.util.Comparator;
8 | import java.util.HashMap;
9 | import java.util.List;
10 | import java.util.Map;
11 |
12 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
13 | import fr.eurecom.adel.commons.datatypes.Document;
14 | import fr.eurecom.adel.commons.datatypes.Entity;
15 | import fr.eurecom.adel.commons.datatypes.Token;
16 | import fr.eurecom.adel.commons.datatypes.TweetEntity;
17 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
18 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
19 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
20 |
21 | /**
22 | * @author Julien Plu on 2018-12-09.
23 | */
24 | public class RecognitionPipeline {
25 | private final List annotators;
26 | private final OverlapResolution overlapResolution;
27 | private final TweetNormalization tweetNormalization;
28 | private final int indexAnnotatorAsTokenizer;
29 | private final RecognitionConfig config;
30 |
31 | public RecognitionPipeline(final List newAnnotators, final OverlapResolution newOverlapResolution, final TweetNormalization newTweetNormalization, final int newIndexAnnotatorAsTokenizer, final RecognitionConfig newConfig) {
32 | this.annotators = new ArrayList<>(newAnnotators);
33 | this.overlapResolution = newOverlapResolution;
34 | this.tweetNormalization = newTweetNormalization;
35 | this.indexAnnotatorAsTokenizer = newIndexAnnotatorAsTokenizer;
36 | this.config = newConfig;
37 | }
38 |
39 | public final RecognitionConfig getConfig() {
40 | return this.config;
41 | }
42 |
43 | public final Map run(final String text) throws MappingNotExistsException, TypeNotExistsException {
44 | Pair> normalizeTweet = Pair.of(text, new ArrayList<>());
45 |
46 | if (null != this.tweetNormalization) {
47 | normalizeTweet = this.tweetNormalization.normalize(text);
48 | }
49 |
50 | final Map> documents = new HashMap<>();
51 |
52 | for (final Annotator annotator : this.annotators) {
53 | final List tmpEntities = annotator.annotate(normalizeTweet.getLeft());
54 |
55 | documents.put(annotator.getConfig(), tmpEntities);
56 | }
57 |
58 | final List noOverlapEntities = this.overlapResolution.resolveOverlap(documents);
59 | final List> tokens = this.annotators.get(this.indexAnnotatorAsTokenizer).tokenize(text);
60 | Document adelDocument = Document.builder().text(text).entities(noOverlapEntities).tokens(tokens).build();
61 |
62 | if (this.config.getTweetnormalization().getActivate()) {
63 | adelDocument = Document.builder().text(text).entities(this.alignment(noOverlapEntities, normalizeTweet.getRight())).tokens(tokens).build();
64 | }
65 |
66 | final Map allDocuments = new HashMap<>(Map.of("adel", adelDocument));
67 |
68 | if (1 == this.annotators.size()) {
69 | return allDocuments;
70 | }
71 |
72 | for (final Map.Entry> document : documents.entrySet()) {
73 | allDocuments.put(document.getKey().getName(), Document.builder().text(text).entities(document.getValue()).tokens(tokens).build());
74 | }
75 |
76 | return allDocuments;
77 | }
78 |
79 | private List alignment(final List entities, final List tweetEntities) {
80 | if (tweetEntities.isEmpty()) {
81 | return entities;
82 | }
83 |
84 | entities.sort(Comparator.comparing(Entity::getStartOffset));
85 | tweetEntities.sort(Comparator.comparing(TweetEntity::getStartOffset));
86 | int offset = 0;
87 |
88 | for (final TweetEntity tweetEntity : tweetEntities) {
89 | offset += StringUtils.countMatches(tweetEntity.getCleanPhrase(), " ");
90 | offset -= 1;
91 |
92 | this.overlapIndex(entities, tweetEntity, offset);
93 | }
94 |
95 | offset = 0;
96 |
97 | for (final Entity entity : entities) {
98 | if (entity.getPhrase().startsWith("#") || entity.getPhrase().startsWith("@")) {
99 | for (final TweetEntity tweetEntity : tweetEntities) {
100 | if (entity.getPhrase().equals(tweetEntity.getPhrase())) {
101 | offset += StringUtils.countMatches(tweetEntity.getCleanPhrase(), " ");
102 | offset -= 1;
103 | break;
104 | }
105 | }
106 | } else {
107 | entity.setStartOffset(entity.getStartOffset() - offset);
108 | entity.setEndOffset(entity.getEndOffset() - offset);
109 | }
110 | }
111 |
112 | return entities;
113 | }
114 |
115 | private void overlapIndex(final List entities, final TweetEntity tweetEntity, final int offset) {
116 | int index = -1;
117 |
118 | for (final Entity entity : entities) {
119 | if (tweetEntity.getStartOffset().equals(entity.getStartOffset()) ||
120 | tweetEntity.getStartOffset() + offset == entity.getStartOffset() ||
121 | tweetEntity.getEndOffset().equals(entity.getEndOffset()) ||
122 | tweetEntity.getEndOffset() + offset == entity.getEndOffset() ||
123 | tweetEntity.getStartOffset() <= entity.getStartOffset() && tweetEntity.getEndOffset() >= entity.getEndOffset()){
124 | index = entities.indexOf(entity);
125 | break;
126 | }
127 | }
128 |
129 | if (-1 != index) {
130 | entities.get(index).setStartOffset(tweetEntity.getStartOffset());
131 | entities.get(index).setEndOffset(tweetEntity.getEndOffset());
132 | entities.get(index).setPhrase(tweetEntity.getPhrase());
133 | }
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/usecases/TweetNormalization.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.usecases;
2 |
3 | import com.vdurmont.emoji.EmojiParser;
4 |
5 | import org.apache.commons.lang3.tuple.Pair;
6 |
7 | import java.util.List;
8 | import java.util.stream.Collectors;
9 | import java.util.stream.Stream;
10 |
11 | import fr.eurecom.adel.commons.datatypes.TweetEntity;
12 | import fr.eurecom.adel.commons.utils.TweetUtils;
13 | import fr.eurecom.adel.recognition.domain.repositories.HashtagSegmentationRepository;
14 | import fr.eurecom.adel.recognition.domain.repositories.UserMentionDereferencingRepository;
15 |
16 | /**
17 | * @author Julien Plu on 2018-12-09.
18 | */
19 | public class TweetNormalization {
20 | private final HashtagSegmentationRepository hashtagSegmentationRepository;
21 | private final UserMentionDereferencingRepository userMentionDereferencingRepository;
22 |
23 | public TweetNormalization(final HashtagSegmentationRepository newHashtagSegmentationRepository, final UserMentionDereferencingRepository newUserMentionDereferencing) {
24 | this.hashtagSegmentationRepository = newHashtagSegmentationRepository;
25 | this.userMentionDereferencingRepository = newUserMentionDereferencing;
26 | }
27 |
28 | final Pair> normalize(final String tweet) {
29 | String normalizedTweet = TweetUtils.removeEmojis(tweet);
30 | final List hashtags = TweetUtils.getHashtags(normalizedTweet);
31 | final List userMentions = TweetUtils.getUserMentions(normalizedTweet);
32 |
33 | for (final TweetEntity hashtag : hashtags) {
34 | hashtag.setCleanPhrase(this.hashtagSegmentationRepository.segment(hashtag.getPhrase().replace("#", "")));
35 |
36 | normalizedTweet = normalizedTweet.replace(hashtag.getPhrase(), hashtag.getCleanPhrase());
37 | }
38 |
39 | for (final TweetEntity userMention : userMentions) {
40 | userMention.setCleanPhrase(this.userMentionDereferencingRepository.dereference(userMention.getPhrase().replace("@", "")));
41 |
42 | normalizedTweet = normalizedTweet.replace(userMention.getPhrase(), userMention.getCleanPhrase());
43 | }
44 |
45 | return Pair.of(normalizedTweet, Stream.concat(hashtags.stream(), userMentions.stream()).collect(Collectors.toList()));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/ContentPriorityList.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2018-12-06.
14 | */
15 | @Target(ElementType.TYPE)
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = ContentPriorityListValidator.class)
18 | @Documented
19 | public @interface ContentPriorityList {
20 | String message() default "{propertylist.content}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/ContentPriorityListValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import javax.validation.ConstraintValidator;
4 | import javax.validation.ConstraintValidatorContext;
5 |
6 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
7 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
8 |
9 | /**
10 | * @author Julien Plu on 2018-12-06.
11 | */
12 | public class ContentPriorityListValidator implements ConstraintValidator {
13 | @Override
14 | public final boolean isValid(final RecognitionConfig t, final ConstraintValidatorContext constraintValidatorContext) {
15 | for (final AnnotatorConfig config : t.getAnnotators()) {
16 | if (!t.getTypeoverlapping().getPriority().contains(config.getName())) {
17 | return false;
18 | }
19 | }
20 |
21 | return true;
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/HasTokenizer.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-09.
14 | */
15 | @Target(ElementType.TYPE_USE)
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = HasTokenizerValidator.class)
18 | @Documented
19 | public @interface HasTokenizer {
20 | String message() default "{has.tokenizer}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/HasTokenizerValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.util.List;
4 |
5 | import javax.validation.ConstraintValidator;
6 | import javax.validation.ConstraintValidatorContext;
7 |
8 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
9 |
10 | /**
11 | * @author Julien Plu on 2019-02-09.
12 | */
13 | public class HasTokenizerValidator implements ConstraintValidator> {
14 | @Override
15 | public final boolean isValid(final List t, final ConstraintValidatorContext constraintValidatorContext) {
16 | for (final AnnotatorConfig config : t) {
17 | if (config.getTokenizer()) {
18 | return true;
19 | }
20 | }
21 |
22 | return false;
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/NameExistsForRecognition.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-09.
14 | */
15 | @Target(ElementType.TYPE)
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = NameExistsForRecognitionValidator.class)
18 | @Documented
19 | public @interface NameExistsForRecognition {
20 | String message() default "{name.exists}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/NameExistsForRecognitionValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import javax.validation.ConstraintValidator;
4 | import javax.validation.ConstraintValidatorContext;
5 |
6 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
7 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
8 | import fr.eurecom.adel.commons.utils.ReflectionUtils;
9 |
10 | /**
11 | * @author Julien Plu on 2019-02-09.
12 | */
13 | public class NameExistsForRecognitionValidator implements ConstraintValidator {
14 | @Override
15 | public final boolean isValid(final RecognitionConfig t, final ConstraintValidatorContext constraintValidatorContext) {
16 | for (final AnnotatorConfig annotatorConfig : t.getAnnotators()) {
17 | if (ReflectionUtils.getClassNameFromMethod(annotatorConfig.getAnnotator(), "annotator").isEmpty()) {
18 | return false;
19 | }
20 | }
21 |
22 | if (ReflectionUtils.getClassNameFromMethod(t.getTypeoverlapping().getMethod(), "typeoverlapresolution").isEmpty()) {
23 | return false;
24 | }
25 |
26 | if (ReflectionUtils.getClassNameFromMethod(t.getMentionoverlapping(), "mentionoverlapresolution").isEmpty()) {
27 | return false;
28 | }
29 |
30 | if (ReflectionUtils.getClassNameFromMethod(t.getTweetnormalization().getUsermention(), "usermentiondereferencing").isEmpty()) {
31 | return false;
32 | }
33 |
34 | return !ReflectionUtils.getClassNameFromMethod(t.getTweetnormalization().getHashtag(), "hashtagsegmentation").isEmpty();
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/SizePriorityList.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2018-12-05.
14 | */
15 | @Target(ElementType.TYPE)
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = SizePriorityListValidator.class)
18 | @Documented
19 | public @interface SizePriorityList {
20 | String message() default "{propertylist.size}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/SizePriorityListValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import javax.validation.ConstraintValidator;
4 | import javax.validation.ConstraintValidatorContext;
5 |
6 | import fr.eurecom.adel.recognition.configuration.RecognitionConfig;
7 |
8 | /**
9 | * @author Julien Plu on 2018-12-05.
10 | */
11 | public class SizePriorityListValidator implements ConstraintValidator {
12 | @Override
13 | public final boolean isValid(final RecognitionConfig t, final ConstraintValidatorContext constraintValidatorContext) {
14 | return t.getAnnotators().size() == t.getTypeoverlapping().getPriority().size();
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/UniqueName.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.lang.annotation.Documented;
4 | import java.lang.annotation.ElementType;
5 | import java.lang.annotation.Retention;
6 | import java.lang.annotation.RetentionPolicy;
7 | import java.lang.annotation.Target;
8 |
9 | import javax.validation.Constraint;
10 | import javax.validation.Payload;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-13.
14 | */
15 | @Target(ElementType.TYPE_USE)
16 | @Retention(RetentionPolicy.RUNTIME)
17 | @Constraint(validatedBy = UniqueNameValidator.class)
18 | @Documented
19 | public @interface UniqueName {
20 | String message() default "{unique.name}";
21 | Class>[] groups() default {};
22 | Class extends Payload>[] payload() default {};
23 | }
24 |
--------------------------------------------------------------------------------
/adel-recognition/src/main/java/fr/eurecom/adel/recognition/validators/UniqueNameValidator.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.validators;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collection;
5 | import java.util.List;
6 |
7 | import javax.validation.ConstraintValidator;
8 | import javax.validation.ConstraintValidatorContext;
9 |
10 | import fr.eurecom.adel.recognition.configuration.AnnotatorConfig;
11 |
12 | /**
13 | * @author Julien Plu on 2019-02-13.
14 | */
15 | public class UniqueNameValidator implements ConstraintValidator> {
16 | @Override
17 | public final boolean isValid(final List t, final ConstraintValidatorContext constraintValidatorContext) {
18 | final Collection names = new ArrayList<>();
19 |
20 | for (final AnnotatorConfig extractor : t) {
21 | if (names.contains(extractor.getName())) {
22 | return false;
23 | } else {
24 | names.add(extractor.getName());
25 | }
26 | }
27 |
28 | return true;
29 | }
30 | }
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/CoNLL2DBpedia.map:
--------------------------------------------------------------------------------
1 | PER http://dbpedia.org/ontology/Person
2 | ORG http://dbpedia.org/ontology/Organisation
3 | LOC http://dbpedia.org/ontology/Place
4 | MISC http://www.w3.org/2002/07/owl#Thing
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/CoNLL2DUL.map:
--------------------------------------------------------------------------------
1 | PER http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person
2 | LOC http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Place
3 | ORG http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Organization
4 | MISC http://www.w3.org/2002/07/owl#Thing
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/CoNLL2MUC.map:
--------------------------------------------------------------------------------
1 | PER PERSON
2 | LOC LOCATION
3 | ORG ORGANIZATION
4 | MISC O
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/CoNLL2Musicbrainz.map:
--------------------------------------------------------------------------------
1 | PER http://purl.org/ontology/mo/MusicArtist
2 | ORG http://purl.org/ontology/mo/MusicArtist
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/CoNLL2NEEL.map:
--------------------------------------------------------------------------------
1 | MISC Thing
2 | PER Person
3 | LOC Location
4 | ORG Organization
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/DBpedia2CoNLL.map:
--------------------------------------------------------------------------------
1 | dbo_Person,http://dbpedia.org/ontology/Person PER
2 | dbo_Place,http://dbpedia.org/ontology/Place LOC
3 | dbo_Organisation,http://dbpedia.org/ontology/Organisation ORG
4 | dbo_Activity,dbo_Altitude,dbo_AnatomicalStructure,dbo_Area,dbo_Award,dbo_Biomolecule,dbo_Blazon,dbo_ChartsPlacements,dbo_ChemicalSubstance,dbo_Colour,dbo_Currency,dbo_Demographics,dbo_Depth,dbo_Device,dbo_Diploma,dbo_Disease,dbo_ElectionDiagram,dbo_EthnicGroup,dbo_Event,dbo_Flag,dbo_Food,dbo_GeneLocation,dbo_GrossDomesticProduct,dbo_GrossDomesticProductPerCapita,dbo_Holiday,dbo_HumanDevelopmentIndex,dbo_Language,dbo_List,dbo_MeanOfTransportation,dbo_Media,dbo_Medicine,dbo_Name,dbo_NaturalEvent,dbo_PenaltyShootOut,dbo_PersonFunction,dbo_Polyhedron,dbo_Population,dbo_PublicService,dbo_Relationship,dbo_RouteStop,dbo_Species,dbo_SportCompetitionResult,dbo_SportsSeason,dbo_Statistic,dbo_TimePeriod,dbo_TopicalConcept,dbo_UnitOfWork,dbo_Unknown,dbo_Work,http://dbpedia.org/ontology/Activity,http://dbpedia.org/ontology/Altitude,http://dbpedia.org/ontology/AnatomicalStructure,http://dbpedia.org/ontology/Area,http://dbpedia.org/ontology/Award,http://dbpedia.org/ontology/Biomolecule,http://dbpedia.org/ontology/Blazon,http://dbpedia.org/ontology/ChartsPlacements,http://dbpedia.org/ontology/ChemicalSubstance,http://dbpedia.org/ontology/Colour,http://dbpedia.org/ontology/Currency,http://dbpedia.org/ontology/Demographics,http://dbpedia.org/ontology/Depth,http://dbpedia.org/ontology/Device,http://dbpedia.org/ontology/Diploma,http://dbpedia.org/ontology/Disease,http://dbpedia.org/ontology/ElectionDiagram,http://dbpedia.org/ontology/EthnicGroup,http://dbpedia.org/ontology/Event,http://dbpedia.org/ontology/Flag,http://dbpedia.org/ontology/Food,http://dbpedia.org/ontology/GeneLocation,http://dbpedia.org/ontology/GrossDomesticProduct,http://dbpedia.org/ontology/GrossDomesticProductPerCapita,http://dbpedia.org/ontology/Holiday,http://dbpedia.org/ontology/HumanDevelopmentIndex,http://dbpedia.org/ontology/Language,http://dbpedia.org/ontology/List,http://dbpedia.org/ontology/MeanOfTransportation,http://dbpedia.org/ontology/Media,http://dbpedia.org/ontology/Medicine,http://dbpedia.org/ontology/Name,http://dbpedia.org/ontology/NaturalEvent,http://dbpedia.org/ontology/PenaltyShootOut,http://dbpedia.org/ontology/PersonFunction,http://dbpedia.org/ontology/Polyhedron,http://dbpedia.org/ontology/Population,http://dbpedia.org/ontology/PublicService,http://dbpedia.org/ontology/Relationship,http://dbpedia.org/ontology/RouteStop,http://dbpedia.org/ontology/Species,http://dbpedia.org/ontology/SportCompetitionResult,http://dbpedia.org/ontology/SportsSeason,http://dbpedia.org/ontology/Statistic,http://dbpedia.org/ontology/TimePeriod,http://dbpedia.org/ontology/TopicalConcept,http://dbpedia.org/ontology/UnitOfWork,http://dbpedia.org/ontology/Unknown,http://dbpedia.org/ontology/Work MISC
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/DBpedia2DUL.map:
--------------------------------------------------------------------------------
1 | dbo_Person,http://dbpedia.org/ontology/Person http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person
2 | http://dbpedia.org/ontology/Place http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Place
3 | dbo_Organisation,http://dbpedia.org/ontology/Organisation http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Organization
4 | http://dbpedia.org/ontology/Activity,http://dbpedia.org/ontology/Altitude,http://dbpedia.org/ontology/AnatomicalStructure,http://dbpedia.org/ontology/Area,http://dbpedia.org/ontology/Award,http://dbpedia.org/ontology/Biomolecule,http://dbpedia.org/ontology/Blazon,http://dbpedia.org/ontology/ChartsPlacements,http://dbpedia.org/ontology/ChemicalSubstance,http://dbpedia.org/ontology/Colour,http://dbpedia.org/ontology/Currency,http://dbpedia.org/ontology/Demographics,http://dbpedia.org/ontology/Depth,http://dbpedia.org/ontology/Device,http://dbpedia.org/ontology/Diploma,http://dbpedia.org/ontology/Disease,http://dbpedia.org/ontology/ElectionDiagram,http://dbpedia.org/ontology/EthnicGroup,http://dbpedia.org/ontology/Event,http://dbpedia.org/ontology/Flag,http://dbpedia.org/ontology/Food,http://dbpedia.org/ontology/GeneLocation,http://dbpedia.org/ontology/GrossDomesticProduct,http://dbpedia.org/ontology/GrossDomesticProductPerCapita,http://dbpedia.org/ontology/Holiday,http://dbpedia.org/ontology/HumanDevelopmentIndex,http://dbpedia.org/ontology/Language,http://dbpedia.org/ontology/List,http://dbpedia.org/ontology/MeanOfTransportation,http://dbpedia.org/ontology/Media,http://dbpedia.org/ontology/Medicine,http://dbpedia.org/ontology/Name,http://dbpedia.org/ontology/NaturalEvent,http://dbpedia.org/ontology/PenaltyShootOut,http://dbpedia.org/ontology/PersonFunction,http://dbpedia.org/ontology/Polyhedron,http://dbpedia.org/ontology/Population,http://dbpedia.org/ontology/PublicService,http://dbpedia.org/ontology/Relationship,http://dbpedia.org/ontology/RouteStop,http://dbpedia.org/ontology/Species,http://dbpedia.org/ontology/SportCompetitionResult,http://dbpedia.org/ontology/SportsSeason,http://dbpedia.org/ontology/Statistic,http://dbpedia.org/ontology/TimePeriod,http://dbpedia.org/ontology/TopicalConcept,http://dbpedia.org/ontology/UnitOfWork,http://dbpedia.org/ontology/Unknown,http://dbpedia.org/ontology/Work http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/DBpedia2NEEL.map:
--------------------------------------------------------------------------------
1 | http://dbpedia.org/ontology/SpaceStation,http://dbpedia.org/ontology/Species,http://dbpedia.org/ontology/Award,http://dbpedia.org/ontology/Activity,http://dbpedia.org/ontology/Language,http://dbpedia.org/ontology/EthnicGroup,http://dbpedia.org/ontology/Disease,http://dbpedia.org/ontology/CelestialBody Thing
2 | http://dbpedia.org/ontology/Holiday,http://dbpedia.org/ontology/Event Event
3 | http://dbpedia.org/ontology/FictionalCharacter Character
4 | http://dbpedia.org/ontology/Place,http://dbpedia.org/ontology/ArchitecturalStructure,http://dbpedia.org/ontology/Cemetery,http://dbpedia.org/ontology/ConcentrationCamp,http://dbpedia.org/ontology/CountrySeat,http://dbpedia.org/ontology/Garden,http://dbpedia.org/ontology/HistoricPlace,http://dbpedia.org/ontology/Mine,http://dbpedia.org/ontology/Monument,http://dbpedia.org/ontology/NaturalPlace,http://dbpedia.org/ontology/Park,http://dbpedia.org/ontology/PopulatedPlace,http://dbpedia.org/ontology/ProtectedArea,http://dbpedia.org/ontology/SiteOfSpecialScientificInterest,http://dbpedia.org/ontology/WineRegion,http://dbpedia.org/ontology/WorldHeritageSite Location
5 | http://dbpedia.org/ontology/Organisation,http://dbpedia.org/ontology/Family Organization
6 | http://dbpedia.org/ontology/Person,http://dbpedia.org/ontology/Ambassador,http://dbpedia.org/ontology/Archeologist,http://dbpedia.org/ontology/Architect,http://dbpedia.org/ontology/Aristocrat,http://dbpedia.org/ontology/Artist,http://dbpedia.org/ontology/Astronaut,http://dbpedia.org/ontology/Athlete,http://dbpedia.org/ontology/BeautyQueen,http://dbpedia.org/ontology/BusinessPerson,http://dbpedia.org/ontology/Celebrity,http://dbpedia.org/ontology/Chef,http://dbpedia.org/ontology/Cleric,http://dbpedia.org/ontology/Coach,http://dbpedia.org/ontology/Criminal,http://dbpedia/org/ontology/Economist,http://dbpedia.org/ontology/Egyptologist,http://dbpedia.org/ontology/Engineer,http://dbpedia.org/ontology/Farmer,http://dbpedia.org/ontology/HorseTrainer,http://dbpedia.org/ontology/Journalist,http://dbpedia.org/ontology/Judge,http://dbpedia.org/ontology/Lawyer,http://dbpedia.org/ontology/Linguist,http://dbpedia.org/ontology/MemberResistanceMovement,http://dbpedia.org/ontology/MilitaryPerson,http://dbpedia.org/ontology/Model,http://dbpedia.org/ontology/Monarch,http://dbpedia.org/ontology/MovieDirector,http://dbpedia.org/ontology/Noble,http://dbpedia.org/ontology/OfficeHolder,http://dbpedia.org/ontology/OrganisationMember,http://dbpedia.org/ontology/Orphan,http://dbpedia.org/ontology/Philosopher,http://dbpedia.org/ontology/PlayboyPlaymate,http://dbpedia.org/ontology/Politician,http://dbpedia.org/ontology/PoliticianSpouse,http://dbpedia.org/ontology/Presenter,http://dbpedia.org/ontology/Producer,http://dbpedia.org/ontology/Psychologist,http://dbpedia.org/ontology/Referee,http://dbpedia.org/ontology/Religious,http://dbpedia.org/ontology/RomanEmperor,http://dbpedia.org/ontology/Royalty,http://dbpedia.org/ontology/Scientist,http://dbpedia.org/ontology/SportsManager,http://dbpedia.org/ontology/TelevisionDirector,http://dbpedia.org/ontology/TelevisionPersonality,http://dbpedia.org/ontology/TheatreDirector,http://dbpedia.org/ontology/Writer Person
7 | http://dbpedia.org/ontology/Work,http://dbpedia.org/ontology/MeanOfTransportation,http://dbpedia.org/ontology/ProgrammingLanguage Product
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/mappings/MUC2CoNLL.map:
--------------------------------------------------------------------------------
1 | PERSON PER
2 | ORGANIZATION ORG
3 | LOCATION LOC
4 | DATE MISC
5 | TIME MISC
6 | MONEY MISC
7 | PERCENT MISC
8 | MISC MISC
--------------------------------------------------------------------------------
/adel-recognition/src/main/resources/stanford-full-en.properties:
--------------------------------------------------------------------------------
1 | annotators=tokenize,ssplit,pos,lemma,ner,parse,coref
2 | threads=8
3 | coref.algorithm=neural
4 | ner.applyFineGrained=false
5 | ner.applyNumericClassifiers=false
6 | ner.useSUTime=false
--------------------------------------------------------------------------------
/adel-recognition/src/test/java/fr/eurecom/adel/recognition/implementation/repositories/typeoverlapresolution/majorityvoting/MajorityVotingTypeOverlapResolutionTest.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.recognition.implementation.repositories.typeoverlapresolution.majorityvoting;
2 |
3 | import org.junit.jupiter.api.Assertions;
4 | import org.junit.jupiter.api.Test;
5 |
6 | import java.io.IOException;
7 | import java.util.ArrayList;
8 | import java.util.Arrays;
9 | import java.util.List;
10 |
11 | import fr.eurecom.adel.recognition.configuration.TypeOverlappingConfig;
12 | import fr.eurecom.adel.commons.datatypes.Document;
13 | import fr.eurecom.adel.commons.datatypes.Entity;
14 | import fr.eurecom.adel.recognition.domain.repositories.TypeOverlapResolutionRepository;
15 | import fr.eurecom.adel.recognition.exceptions.MappingNotExistsException;
16 | import fr.eurecom.adel.recognition.exceptions.TypeNotExistsException;
17 |
18 | /**
19 | * @author Julien Plu on 2018-12-18.
20 | */
21 | class MajorityVotingTypeOverlapResolutionTest {
22 | @Test
23 | final void resolveTypeOverlappingHappyMajority() throws MappingNotExistsException, TypeNotExistsException {
24 | final TypeOverlappingConfig config = new TypeOverlappingConfig();
25 |
26 | config.setTo("CoNLL");
27 | config.setPriority(Arrays.asList("ann1", "ann2", "ann3"));
28 |
29 | final List entities = new ArrayList<>();
30 | final Entity entity = Entity.builder().type("PERfromCoNLL--ann1||dbo_PersonfromDBpedia--ann2||dbo_PlacefromDBpedia--ann3").build();
31 |
32 | entities.add(entity);
33 |
34 | final Document document = Document.builder().entities(entities).build();
35 | final TypeOverlapResolutionRepository voting = new MajorityVotingTypeOverlapResolution();
36 |
37 | voting.resolveTypeOverlapping(config, document.getEntities());
38 |
39 | Assertions.assertEquals("PER", document.getEntities().get(0).getType(), "Must be equals");
40 | }
41 |
42 | @Test
43 | final void resolveTypeOverlappingHappyDrawPerson() throws MappingNotExistsException, TypeNotExistsException {
44 | final TypeOverlappingConfig config = new TypeOverlappingConfig();
45 |
46 | config.setTo("CoNLL");
47 | config.setPriority(Arrays.asList("ann1", "ann3"));
48 |
49 | final List entities = new ArrayList<>();
50 | final Entity entity = Entity.builder().type("PERfromCoNLL--ann1||dbo_PlacefromDBpedia--ann3").build();
51 |
52 | entities.add(entity);
53 |
54 | final Document document = Document.builder().entities(entities).build();
55 | final TypeOverlapResolutionRepository voting = new MajorityVotingTypeOverlapResolution();
56 |
57 | voting.resolveTypeOverlapping(config, document.getEntities());
58 |
59 | Assertions.assertEquals("PER", document.getEntities().get(0).getType(), "Must be equals");
60 | }
61 |
62 | @Test
63 | final void resolveTypeOverlappingHappyDrawPlace() throws MappingNotExistsException, TypeNotExistsException {
64 | final TypeOverlappingConfig config = new TypeOverlappingConfig();
65 |
66 | config.setTo("CoNLL");
67 | config.setPriority(Arrays.asList("ann3", "ann1"));
68 |
69 | final List entities = new ArrayList<>();
70 | final Entity entity = Entity.builder().type("PERfromCoNLL--ann1||dbo_PlacefromDBpedia--ann3").build();
71 |
72 | entities.add(entity);
73 |
74 | final Document document = Document.builder().entities(entities).build();
75 | final TypeOverlapResolutionRepository voting = new MajorityVotingTypeOverlapResolution();
76 |
77 | voting.resolveTypeOverlapping(config, document.getEntities());
78 |
79 | Assertions.assertEquals("LOC", document.getEntities().get(0).getType(), "Must be equals");
80 | }
81 |
82 | @Test
83 | final void resolveTypeOverlappingUnHappyWrongMapping() {
84 | final TypeOverlappingConfig config = new TypeOverlappingConfig();
85 |
86 | config.setTo("CoNLL");
87 | config.setPriority(Arrays.asList("ann3", "ann1"));
88 |
89 | final List entities = new ArrayList<>();
90 | final Entity entity = Entity.builder().type("PERfromCNL--ann1||dbo_PlacefromDBpedia--ann3").build();
91 |
92 | entities.add(entity);
93 |
94 | final Document document = Document.builder().entities(entities).build();
95 | final TypeOverlapResolutionRepository voting = new MajorityVotingTypeOverlapResolution();
96 |
97 | Assertions.assertThrows(MappingNotExistsException.class, () -> voting.resolveTypeOverlapping(config, document.getEntities()));
98 | }
99 |
100 | @Test
101 | final void resolveTypeOverlappingUnHappyWrongType() {
102 | final TypeOverlappingConfig config = new TypeOverlappingConfig();
103 |
104 | config.setTo("MUC");
105 | config.setPriority(Arrays.asList("ann3", "ann1"));
106 |
107 | final List entities = new ArrayList<>();
108 | final Entity entity = Entity.builder().type("PEOPEOfromCoNLL--ann1||dbo_PlacefromDBpedia--ann3").build();
109 |
110 | entities.add(entity);
111 |
112 | final Document document = Document.builder().entities(entities).build();
113 | final TypeOverlapResolutionRepository voting = new MajorityVotingTypeOverlapResolution();
114 |
115 | Assertions.assertThrows(TypeNotExistsException.class, () -> voting.resolveTypeOverlapping(config, document.getEntities()));
116 | }
117 | }
--------------------------------------------------------------------------------
/adel-shell/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | adel-shell
6 | ${revision}
7 | adel-shell
8 | http://adel.eurecom.fr
9 |
10 |
11 | fr.eurecom.adel
12 | adel-pom
13 | ${revision}
14 |
15 |
16 |
17 |
18 | org.springframework.cloud
19 | spring-cloud-starter-config
20 | ${spring.cloud.starter.config.version}
21 |
22 |
23 |
24 | fr.eurecom.adel
25 | adel-recognition
26 | ${revision}
27 |
28 |
29 |
30 | org.springframework.shell
31 | spring-shell-starter
32 | ${spring.shell.starter.version}
33 |
34 |
35 |
36 |
37 |
38 |
39 | org.springframework.boot
40 | spring-boot-maven-plugin
41 |
42 |
43 | org.apache.maven.plugins
44 | maven-compiler-plugin
45 |
46 | 13
47 | 13
48 |
49 |
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/adel-shell/src/main/java/fr/eurecom/adel/shell/ADELShellApplication.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.shell;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 | import org.springframework.context.annotation.ComponentScan;
6 |
7 | @SpringBootApplication
8 | @ComponentScan("fr.eurecom.adel")
9 | public class ADELShellApplication {
10 | public static void main(final String... args) {
11 | SpringApplication.run(ADELShellApplication.class, args);
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/adel-shell/src/main/java/fr/eurecom/adel/shell/setting/SimplePromptProvider.java:
--------------------------------------------------------------------------------
1 | package fr.eurecom.adel.shell.setting;
2 |
3 | import org.jline.utils.AttributedString;
4 | import org.jline.utils.AttributedStyle;
5 | import org.springframework.shell.jline.PromptProvider;
6 | import org.springframework.stereotype.Component;
7 |
8 | @Component
9 | public class SimplePromptProvider implements PromptProvider {
10 | @Override
11 | public AttributedString getPrompt() {
12 | return new AttributedString("adel-shell:>", AttributedStyle.DEFAULT.foreground(AttributedStyle.YELLOW));
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/adel-shell/src/main/resources/bootstrap.yaml:
--------------------------------------------------------------------------------
1 | spring:
2 | cloud:
3 | config:
4 | uri: ${CONFIG_URI:http://localhost:8888}
5 | application:
6 | name: adel
7 | main:
8 | web-application-type: none
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | fr.eurecom.adel
5 | adel-pom
6 | pom
7 | ${revision}
8 | ADEL
9 | http://adel.eurecom.fr
10 |
11 |
12 | 2.0.0
13 | UTF-8
14 | UTF-8
15 | 13
16 | 1.10.0
17 | 0.31.0
18 | 3.0.0-M3
19 | 2.2.1.RELEASE
20 | 2.2.1.RELEASE
21 | 2.2.1.RELEASE
22 | 2.2.1.RELEASE
23 | 2.2.1.RELEASE
24 | 2.2.1.RELEASE
25 | 2.2.1
26 | 2.2.1
27 | 1.2.4.RELEASE
28 | 2.0.0-RC.7
29 | 1.18.8
30 | 2.9.2
31 | 2.9.2
32 | 2.10.1
33 | 2.4.0-b180830.0359
34 | 2.3.0.1
35 | 2.4.0-b180830.0438
36 | 1.1.1
37 | 2.0.1.Final
38 | 0.9.12
39 | 6.1.0.Final
40 | 3.0.1
41 | 5.1.1
42 | 1.2
43 | 0.3.2
44 | 3.13.1
45 | 20190722
46 | 5.5.2
47 | 5.5.2
48 | 1.5.2
49 | 1.5.2
50 | 3.9.2
51 | 28.2-jre
52 | 1.12.1
53 | 2.0.1.RELEASE
54 | 0.6.0
55 |
56 |
57 |
58 | org.springframework.boot
59 | spring-boot-starter-parent
60 | 2.2.2.RELEASE
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | io.fabric8
69 | docker-maven-plugin
70 | ${docker.maven.plugin.version}
71 |
72 |
73 |
74 |
75 |
76 | org.apache.maven.plugins
77 | maven-compiler-plugin
78 |
79 | 13
80 | 13
81 |
82 |
83 |
84 |
85 |
86 |
107 |
108 |
109 | adel-commons
110 | adel-recognition
111 | adel-api
112 | adel-shell
113 | adel-linking
114 | adel-indexing
115 | adel-discovery-server
116 | adel-config-server
117 | adel-admin-server
118 | adel-api-gateway
119 | adel-hystrix-dashboard
120 |
121 |
122 |
--------------------------------------------------------------------------------