├── README.md ├── pom.xml └── src └── main └── java └── org └── iptc └── extra └── core ├── daos ├── CorporaDAO.java ├── DAO.java ├── DictionariesDAO.java ├── GroupDAO.java ├── RulesDAO.java ├── SchemasDAO.java ├── TaxonomiesDAO.java └── TopicsDAO.java ├── eql ├── EQLMapper.java ├── EQLParser.java ├── antlr │ ├── Eql.tokens │ ├── EqlBaseListener.java │ ├── EqlBaseVisitor.java │ ├── EqlLexer.java │ ├── EqlLexer.tokens │ ├── EqlListener.java │ ├── EqlParser.java │ └── EqlVisitor.java └── tree │ ├── SyntaxError.java │ ├── SyntaxTree.java │ ├── extra │ ├── EQLOperator.java │ └── EQLRelation.java │ ├── nodes │ ├── Clause.java │ ├── CommentClause.java │ ├── ErrorMessageNode.java │ ├── Index.java │ ├── Modified.java │ ├── Modifier.java │ ├── Node.java │ ├── Operator.java │ ├── PrefixClause.java │ ├── ReferenceClause.java │ ├── Relation.java │ ├── SearchClause.java │ └── SearchTerm.java │ ├── utils │ └── TreeUtils.java │ └── visitor │ ├── EQL2ESQueryVisitor.java │ ├── EQL2HTMLVisitor.java │ ├── EQL2HighlightVisitor.java │ ├── EQL2JSTreeVisitor.java │ ├── EQLPretifierVisitor.java │ ├── EQLValidator.java │ ├── ReferenceClausesVisitor.java │ └── SyntaxTreeVisitor.java ├── es ├── ElasticSearchClient.java ├── ElasticSearchResponse.java └── ElasticSearchUtils.java ├── types ├── Corpus.java ├── Dictionary.java ├── Group.java ├── Rule.java ├── Schema.java ├── Taxonomy.java ├── Topic.java └── document │ ├── Document.java │ ├── DocumentField.java │ ├── DocumentTopic.java │ ├── NestedField.java │ ├── Paragraph.java │ ├── Sentence.java │ ├── StructuredTextField.java │ └── TextField.java └── utils └── TextUtils.java /README.md: -------------------------------------------------------------------------------- 1 | # extra-core 2 | 3 | This repository contains the core implementation of the IPTC EXTRA project. 4 | 5 | ## Getting Started 6 | 7 | Extra-core is a Java project, configured as a Maven project to handle dependencies, and consists of four main packages: 8 | 9 | * **org.iptc.extra.core.types:** contains the classes represent the core classes used in EXTRA, like Rules, Schemas, Documents, etc. 10 | * **org.iptc.extra.core.daos:** contains a set of classes to support CRUD operations on mongodb for the objects of EXTRA. 11 | * **org.iptc.extra.core.es:** contains a client for access to Elastic Search, to support indexing of documents and rules, retrieval, etc 12 | * **org.iptc.extra.core.eql:** is the main package that contains a set of classes for the parsing, processing and transformation of EQL rules. 13 | 14 | ## Extra Query Language 15 | EQL, the EXTRA Query Language, is a formal language for representing rules, used for retrieval and tagging of documents. EQL queries are intended to be human readable and writable, intuitive, and expressive. This implementation can parse EQL syntax, and translates it to ElasticSearch query language. 16 | 17 | ### Search Clauses 18 | 19 | The core building block of EQL is a search clause, which consists of an **Index** a **Relation** and a **SearchTerm**. SearchTerm MUST be enclosed in double quotes if they contain any of the following characters: < > = / ( ) and whitespace. Index and relation are optional. In that case any field is implied: 20 | 21 | **searchClause = (index relation)? searchTerm** 22 | 23 | For example: 24 | 25 | - title adj "civil liberties" 26 | - body any "cannabis cocaine crack drug drugs heroin marijuana meth pot narcotic narcotics" 27 | - "religious freedom" 28 | 29 | Modifiers can be applied on relations to adapt their meaning. 30 | 31 | **relation = relationName (/modifierName comparator value)* ** 32 | 33 | For example: 34 | 35 | - *title adj/stemming "civil liberties"* 36 | 37 | ### Boolean Operators 38 | Search clauses can be combined with n-ary boolean operators in a prefix way. The set of supported operators is or, and, not and prox. Both lower and upper case is valid. 39 | 40 | The basic way to way combine search clauses is: 41 | 42 | **booleanClause = (booleanOperator searchClause+)** 43 | 44 | Also boolean operators can be used to combine other boolean clauses, or boolean with searchClauses. 45 | 46 | **booleanClause = (booleanOperator (searchClause | booleanClause)+)** 47 | 48 | Boolean operators can be modified in a similar way as relations: 49 | 50 | **booleanOperator = booleanName (/modifierName comparator value) ** 51 | 52 | For example: 53 | 54 | - prox/unit=word/distance>4 55 | - prox/unit=sentence/distance<2 56 | - prox/unit=paragraph/distance<=1 57 | - or/count>4 58 | 59 | ## Usage 60 | 61 | ### Get the artifacts 62 | 63 | To use extra-core project add the following dependency to your `pom.xml`: 64 | 65 | ```xml 66 | 67 | org.iptc 68 | extra-core 69 | 0.1.1-SNAPSHOT 70 | 71 | ``` 72 | 73 | ### Retrieval and Creation of Rules, Schemas, Taxonomies and Topics 74 | 75 | To support serialization/deserialization from/to MongoDB: 76 | 77 | ```java 78 | MongoClient mongoClient = new MongoClient("localhost", 27017); 79 | Morphia morphia = new Morphia(); 80 | Datastore datastore = morphia.createDatastore(mongoClient, database); 81 | 82 | RulesDAO rulesDAO = new RulesDAO(datastore); // initialize a rules dao 83 | 84 | Rule rule = rulesDAO.get("595a913da7b11b0001cae336"); // get rule with id 595a913da7b11b0001cae336 85 | String eqlQuery = rule.getQuery(); // get the EQL query of the rule 86 | 87 | SchemasDAO schemasDAO = new SchemasDAO(datastore); // initialize a schemas dao 88 | 89 | Schema schema = new Schema(); // create a new schema 90 | schema.setName("Test schema"); // set name of the new schema 91 | 92 | schema.addField("title", true, true, false); // add title field textual=true, hasSentences=true, hasParagraphs=false 93 | schema.addField("body", true, true, true); // add body field textual=true, hasSentences=true, hasParagraphs=true 94 | 95 | schemasDAO.save(schema); // save the new schema into mongodb 96 | 97 | ``` 98 | 99 | ### Parse, process and transformation of EQL queries 100 | 101 | To parse a string containing an EQL query use parse method of EQLParser: 102 | 103 | ```java 104 | 105 | String eqlQuery = "...."; 106 | 107 | SyntaxTree tree = EQLParser.parse(eqlQuery); 108 | Node root = tree.getRootNode(); 109 | 110 | ``` 111 | 112 | To transform a syntax tree generated from an EQL query to an Elastic Search query, use EQLMapper: 113 | 114 | ```java 115 | QueryBuilder esQuery = EQLMapper.toElasticSearchQuery(node, schema); 116 | ``` 117 | 118 | To perform a depth-first traversal of the syntax tree: 119 | ```java 120 | SyntaxTreeVisitor visitor = new SyntaxTreeVisitor(); 121 | visitor.visit(root); 122 | ``` 123 | 124 | The change that functionality, e.g. to perform an operation in each visited node of the syntax tree, extend `java org.iptc.extra.core.eql.tree.visitor.SyntaxTreeVisitor`. 125 | 126 | For example the following class extends the default behavior of SyntaxTreeVisitor by aggregating the Index names across all Index nodes in the syntax tree: 127 | 128 | ```java 129 | public class MyVisitor extends SyntaxTreeVisitor> { 130 | 131 | // override visit method for Index nodes 132 | @Override 133 | public Set visitIndex(Index index) { 134 | Set set = new HashSet(); 135 | return set.add(index.getIndex()); 136 | } 137 | 138 | protected Set aggregateResult(Set aggregate, Set nextResult) { 139 | return aggregate.addAll(nextResult); 140 | } 141 | 142 | protected Set defaultResult() { 143 | return new HashSet; 144 | } 145 | } 146 | ``` 147 | 148 | Although extra-core can be used as a dependency in any Java project, it's recommended to use the [integrated framework](https://github.com/iptc/extra-ext) developed on top of extra-core. This framework includes a REST API for the management of rules, schemas, etc but also a web user interface for the development, testing and usage of rules. 149 | 150 | See the other repositories of the IPTC EXTRA project: 151 | * [extra-ext](https://github.com/iptc/extra-ext) User Interface, API and other extensions of the Core 152 | * [extra-examples](https://github.com/iptc/extra-examples) Examples developed for the EXTRA Project 153 | 154 | 155 | ## Authors 156 | * **[Manos Schinas](https://github.com/manosetro)** - manosetro@iti.gr 157 | * **[Akis Papadopoulos](https://github.com/kleinmind)** - papadop@iti.gr 158 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | org.iptc 7 | extra-core 8 | 0.1.1-SNAPSHOT 9 | jar 10 | 11 | extra-core 12 | https://github.com/iptc/extra-core 13 | Core implementation of the IPTC EXTRA project 14 | 15 | 16 | 17 | manosetro 18 | Manos Schinas 19 | manosetro@iti.gr 20 | 21 | 22 | 23 | 24 | 25 | The Apache Software License, Version 2.0 26 | http://www.apache.org/licenses/LICENSE-2.0.txt 27 | repo 28 | 29 | 30 | 31 | 32 | scm:git:git@github.com:iptc/extra-core.git 33 | scm:git:git@github.com:iptc/extra-core.git 34 | git@github.com:iptc/extra-core.git 35 | HEAD 36 | 37 | 38 | 39 | UTF-8 40 | 41 | 42 | 43 | 44 | junit 45 | junit 46 | 3.8.1 47 | test 48 | 49 | 50 | org.mongodb.morphia 51 | morphia 52 | 1.3.2 53 | 54 | 55 | org.elasticsearch.client 56 | rest 57 | 5.2.0 58 | 59 | 60 | com.google.code.gson 61 | gson 62 | 2.8.0 63 | 64 | 65 | org.apache.commons 66 | commons-lang3 67 | 3.5 68 | 69 | 70 | org.elasticsearch.client 71 | transport 72 | 5.2.0 73 | 74 | 75 | org.apache.logging.log4j 76 | log4j-api 77 | 2.7 78 | 79 | 80 | org.apache.logging.log4j 81 | log4j-core 82 | 2.7 83 | 84 | 85 | 86 | org.antlr 87 | antlr4-runtime 88 | 4.5.3 89 | 90 | 91 | 92 | edu.stanford.nlp 93 | stanford-corenlp 94 | 3.7.0 95 | 96 | 97 | org.apache.lucene 98 | lucene-queryparser 99 | 100 | 101 | org.apache.lucene 102 | lucene-queries 103 | 104 | 105 | org.apache.lucene 106 | lucene-core 107 | 108 | 109 | org.apache.lucene 110 | lucene-analyzers-common 111 | 112 | 113 | 114 | 115 | 116 | edu.stanford.nlp 117 | stanford-corenlp 118 | 3.7.0 119 | models 120 | 121 | 122 | org.apache.lucene 123 | lucene-queryparser 124 | 125 | 126 | org.apache.lucene 127 | lucene-queries 128 | 129 | 130 | org.apache.lucene 131 | lucene-core 132 | 133 | 134 | org.apache.lucene 135 | lucene-analyzers-common 136 | 137 | 138 | 139 | 140 | 141 | commons-io 142 | commons-io 143 | 2.5 144 | 145 | 146 | org.jsoup 147 | jsoup 148 | 1.10.3 149 | 150 | 151 | 152 | 160 | 161 | org.sonatype.oss 162 | oss-parent 163 | 7 164 | 165 | 166 | 167 | 168 | 169 | 170 | org.apache.maven.plugins 171 | maven-release-plugin 172 | 2.5 173 | 174 | true 175 | false 176 | release 177 | deploy 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | org.sonatype.plugins 186 | nexus-staging-maven-plugin 187 | 1.6.7 188 | true 189 | 190 | ossrh 191 | https://oss.sonatype.org/ 192 | true 193 | 194 | 195 | 196 | org.apache.maven.plugins 197 | maven-compiler-plugin 198 | 2.5.1 199 | 200 | 1.8 201 | 1.8 202 | 203 | 204 | 205 | org.apache.maven.plugins 206 | maven-source-plugin 207 | 2.2.1 208 | 209 | 210 | attach-sources 211 | 212 | jar-no-fork 213 | 214 | 215 | 216 | 217 | 218 | org.apache.maven.plugins 219 | maven-javadoc-plugin 220 | 2.9.1 221 | 222 | -Xdoclint:none 223 | 224 | 225 | 226 | attach-javadocs 227 | 228 | jar 229 | 230 | 231 | 232 | 233 | 234 | org.apache.maven.plugins 235 | maven-gpg-plugin 236 | 1.5 237 | 238 | 239 | sign-artifacts 240 | verify 241 | 242 | sign 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/CorporaDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Corpus; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class CorporaDAO extends DAO { 7 | public CorporaDAO(Datastore ds) { 8 | super(ds); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/DAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.bson.types.ObjectId; 4 | import org.mongodb.morphia.Datastore; 5 | import org.mongodb.morphia.dao.BasicDAO; 6 | import org.mongodb.morphia.query.Query; 7 | 8 | import com.mongodb.WriteResult; 9 | 10 | /** 11 | * 12 | * @author manosetro 13 | * 14 | * @param 15 | * 16 | * Data Access Object class, used to access objects of type K for/to MongoDB. 17 | * 18 | */ 19 | public class DAO extends BasicDAO { 20 | 21 | protected DAO(Datastore ds) { 22 | super(ds); 23 | } 24 | 25 | public K get(String id) { 26 | try { 27 | ObjectId oId = new ObjectId(id); 28 | return this.get(oId); 29 | } 30 | catch(Exception e) { 31 | return null; 32 | } 33 | } 34 | 35 | public WriteResult deleteById(String id) { 36 | ObjectId oId = new ObjectId(id); 37 | return this.deleteById(oId); 38 | } 39 | 40 | public boolean exists(String id) { 41 | Query query = createQuery().filter("_id", new ObjectId(id)); 42 | return this.exists(query); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/DictionariesDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Dictionary; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class DictionariesDAO extends DAO { 7 | public DictionariesDAO(Datastore ds) { 8 | super(ds); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/GroupDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Group; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class GroupDAO extends DAO { 7 | 8 | public GroupDAO(Datastore ds) { 9 | super(ds); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/RulesDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Rule; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class RulesDAO extends DAO { 7 | 8 | public RulesDAO(Datastore ds) { 9 | super(ds); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/SchemasDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Schema; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class SchemasDAO extends DAO { 7 | 8 | public SchemasDAO(Datastore ds) { 9 | super(ds); 10 | } 11 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/TaxonomiesDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Taxonomy; 4 | import org.mongodb.morphia.Datastore; 5 | 6 | public class TaxonomiesDAO extends DAO { 7 | public TaxonomiesDAO(Datastore ds) { 8 | super(ds); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/daos/TopicsDAO.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.daos; 2 | 3 | import org.iptc.extra.core.types.Topic; 4 | import org.mongodb.morphia.Datastore; 5 | import org.mongodb.morphia.query.Query; 6 | 7 | import com.mongodb.WriteResult; 8 | 9 | public class TopicsDAO extends DAO { 10 | 11 | public TopicsDAO(Datastore ds) { 12 | super(ds); 13 | } 14 | 15 | public Topic get(String topicId, String taxonomyId) { 16 | Query q = createQuery() 17 | .filter("topicId", topicId) 18 | .filter("taxonomyId", taxonomyId); 19 | 20 | Topic topic = this.findOne(q); 21 | return topic; 22 | } 23 | 24 | public WriteResult delete(String topicId, String taxonomyId) { 25 | Query q = createQuery() 26 | .filter("topicId", topicId) 27 | .filter("taxonomyId", taxonomyId); 28 | 29 | return this.deleteByQuery(q); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/EQLMapper.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql; 2 | 3 | import org.elasticsearch.index.query.QueryBuilder; 4 | import org.iptc.extra.core.eql.tree.nodes.Node; 5 | import org.iptc.extra.core.eql.tree.visitor.EQL2ESQueryVisitor; 6 | import org.iptc.extra.core.eql.tree.visitor.EQL2HTMLVisitor; 7 | import org.iptc.extra.core.eql.tree.visitor.EQL2HighlightVisitor; 8 | import org.iptc.extra.core.eql.tree.visitor.EQL2JSTreeVisitor; 9 | import org.iptc.extra.core.eql.tree.visitor.EQLPretifierVisitor; 10 | import org.iptc.extra.core.types.Schema; 11 | 12 | /** 13 | * @author manosetro - Manos Schinas 14 | * 15 | * A wrapper class that exposes several transformation of EXTRA rules. 16 | * 17 | */ 18 | public class EQLMapper { 19 | 20 | /* 21 | * Transforms a rule expressed in EQL to elastic search query 22 | */ 23 | public QueryBuilder toElasticSearchQuery(Node root, Schema schema) { 24 | if(root == null) { 25 | return null; 26 | } 27 | 28 | EQL2ESQueryVisitor visitor = new EQL2ESQueryVisitor(schema); 29 | QueryBuilder qb = visitor.visit(root); 30 | 31 | return qb; 32 | } 33 | 34 | /* 35 | * Transforms a rule expressed in EQL to elastic search highlight query 36 | * That's a relaxed version of the ES query. 37 | */ 38 | public QueryBuilder toElasticSearchHighlight(Node root, Schema schema) { 39 | if(root == null) { 40 | return null; 41 | } 42 | 43 | EQL2HighlightVisitor visitor = new EQL2HighlightVisitor(schema); 44 | QueryBuilder qb = visitor.visit(root); 45 | 46 | return qb; 47 | } 48 | 49 | /* 50 | * Transforms an EQL rule to HTML 51 | */ 52 | public String toHtml(Node root, String htmlTag) { 53 | EQL2HTMLVisitor visitor = new EQL2HTMLVisitor(htmlTag); 54 | String html = visitor.visit(root); 55 | 56 | return html; 57 | } 58 | 59 | public String toString(Node root, String newline, String tab) { 60 | if(root == null) { 61 | return null; 62 | } 63 | 64 | EQLPretifierVisitor visitor = new EQLPretifierVisitor(newline, tab); 65 | return visitor.visit(root); 66 | } 67 | 68 | /* 69 | * Transforms an EQL rule to jstree (https://www.jstree.com/) 70 | */ 71 | public String toJSTree(Node root) { 72 | EQL2JSTreeVisitor visitor = new EQL2JSTreeVisitor(); 73 | String tree = visitor.visit(root); 74 | 75 | return "

    " + tree + "
"; 76 | } 77 | 78 | 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/EQLParser.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.antlr.v4.runtime.ANTLRInputStream; 7 | import org.antlr.v4.runtime.BaseErrorListener; 8 | import org.antlr.v4.runtime.CharStream; 9 | import org.antlr.v4.runtime.CommonTokenStream; 10 | import org.antlr.v4.runtime.RecognitionException; 11 | import org.antlr.v4.runtime.Recognizer; 12 | import org.antlr.v4.runtime.Token; 13 | import org.antlr.v4.runtime.tree.ErrorNode; 14 | import org.antlr.v4.runtime.tree.ParseTree; 15 | import org.apache.commons.lang3.StringUtils; 16 | import org.iptc.extra.core.eql.antlr.EqlBaseVisitor; 17 | import org.iptc.extra.core.eql.antlr.EqlLexer; 18 | import org.iptc.extra.core.eql.antlr.EqlParser; 19 | import org.iptc.extra.core.eql.antlr.EqlParser.ModifierContext; 20 | import org.iptc.extra.core.eql.antlr.EqlParser.StatementContext; 21 | import org.iptc.extra.core.eql.tree.SyntaxError; 22 | import org.iptc.extra.core.eql.tree.SyntaxTree; 23 | import org.iptc.extra.core.eql.tree.extra.EQLOperator; 24 | import org.iptc.extra.core.eql.tree.extra.EQLRelation; 25 | import org.iptc.extra.core.eql.tree.nodes.Clause; 26 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 27 | import org.iptc.extra.core.eql.tree.nodes.ErrorMessageNode; 28 | import org.iptc.extra.core.eql.tree.nodes.Index; 29 | import org.iptc.extra.core.eql.tree.nodes.Modifier; 30 | import org.iptc.extra.core.eql.tree.nodes.Node; 31 | import org.iptc.extra.core.eql.tree.nodes.Operator; 32 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 33 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 34 | import org.iptc.extra.core.eql.tree.nodes.Relation; 35 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 36 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 37 | 38 | /** 39 | * @author manosetro - Manos Schinas 40 | * 41 | * Extra Query Language (EQL) parser built upon Antlr 42 | * 43 | * That class is used to parse a rule expressed as an EQL string. 44 | * Generates the corresponding syntax tree (org.iptc.extra.core.eql.tree.SyntaxTree). 45 | * 46 | * This class is a wrapper of the Antlr parser in the package org.iptc.extra.core.eql.antlr 47 | * 48 | * 49 | */ 50 | public class EQLParser { 51 | 52 | /* 53 | * Given a rule expressed as an EQL query, that method returns a syntax tree (org.iptc.extra.core.eql.SyntaxTree) 54 | * 55 | */ 56 | public static SyntaxTree parse(String eql) { 57 | 58 | CharStream input = new ANTLRInputStream(eql); 59 | EqlLexer lexer = new EqlLexer(input); 60 | CommonTokenStream tokens = new CommonTokenStream(lexer); 61 | 62 | EqlParser parser = new EqlParser(tokens); // Antlr parser used to parse EQL 63 | parser.setBuildParseTree(true); 64 | parser.removeErrorListeners(); 65 | 66 | // add custom error listeners 67 | SyntaxErrorsAggregator errorListener = new SyntaxErrorsAggregator(); 68 | parser.addErrorListener(errorListener); 69 | 70 | ParseTree tree = parser.prefixClause(); // get Antlr ParseTree 71 | 72 | SyntaxTree syntaxTree = new SyntaxTree(); 73 | syntaxTree.setErrors(errorListener.getErrors()); 74 | try { 75 | Node root = getRootNode(tree); 76 | syntaxTree.setRootNode(root); 77 | } 78 | catch(Exception e) { 79 | e.printStackTrace(); 80 | System.out.println(e.getMessage()); 81 | } 82 | 83 | return syntaxTree; 84 | } 85 | 86 | /* 87 | * transforms parse tree (org.antlr.v4.runtime.tree.ParseTree) returned by Antlr parser to 88 | * org.iptc.extra.core.eql.tree.Node 89 | */ 90 | private static Node getRootNode(ParseTree tree) { 91 | 92 | EqlBaseVisitor visitor = new EqlBaseVisitor() { 93 | 94 | int depth = 0; 95 | 96 | /* 97 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.PrefixClauseContext and produces the 98 | * equivalent org.iptc.extra.core.eql.tree.nodes.PrefixClause 99 | */ 100 | @Override 101 | public Node visitPrefixClause(EqlParser.PrefixClauseContext ctx) { 102 | 103 | PrefixClause prefixClause = new PrefixClause(); 104 | 105 | prefixClause.setDepth(depth); 106 | depth++; 107 | 108 | for(ParseTree child : ctx.children) { 109 | if(child instanceof ErrorNode) { 110 | Node node = visit(child); 111 | if(node != null && node instanceof ErrorNode) { 112 | prefixClause.addError(node); 113 | } 114 | } 115 | } 116 | 117 | Node operator = visit(ctx.booleanOp()); 118 | operator.setParent(prefixClause); 119 | 120 | EQLOperator extraOperator = EQLOperator.getEQLOperator((Operator) operator); 121 | prefixClause.setEQLOperator(extraOperator); 122 | prefixClause.setOperator((Operator) operator); 123 | 124 | List clauses = new ArrayList(); 125 | for(StatementContext statement : ctx.statement()) { 126 | Node node = visit(statement); 127 | if(node == null) { 128 | continue; 129 | } 130 | node.setParent(prefixClause); 131 | 132 | if(node instanceof Clause) { 133 | clauses.add((Clause) node); 134 | } 135 | else if(node instanceof ErrorNode) { 136 | prefixClause.addError(node); 137 | } 138 | } 139 | prefixClause.setClauses(clauses); 140 | 141 | depth--; 142 | return prefixClause; 143 | } 144 | 145 | /* 146 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.BooleanOpContext and produces the 147 | * equivalent org.iptc.extra.core.eql.tree.nodes.Operator 148 | */ 149 | @Override 150 | public Node visitBooleanOp(EqlParser.BooleanOpContext ctx) { 151 | 152 | String operatorName = null; 153 | if(ctx.OR() != null) { 154 | operatorName = ctx.OR().getText().toLowerCase(); 155 | } 156 | else if(ctx.AND() != null) { 157 | operatorName = ctx.AND().getText().toLowerCase(); 158 | } 159 | else if(ctx.NOT() != null) { 160 | operatorName = ctx.NOT().getText().toLowerCase(); 161 | } 162 | else if(ctx.PROX() != null) { 163 | operatorName = ctx.PROX().getText().toLowerCase(); 164 | } 165 | 166 | Operator operator = new Operator(operatorName); 167 | operator.setDepth(depth); 168 | depth++; 169 | 170 | if(ctx.modifierList() != null) { 171 | List modifiers = new ArrayList(); 172 | for(ModifierContext modCtx : ctx.modifierList().modifier()) { 173 | Modifier modifier = new Modifier(modCtx.modifierName().getText()); 174 | if(modCtx.comparitorSymbol() != null) { 175 | modifier.setComparitor(modCtx.comparitorSymbol().getText()); 176 | } 177 | if(modCtx.modifierValue() != null) { 178 | modifier.setValue(modCtx.modifierValue().getText()); 179 | } 180 | modifiers.add(modifier); 181 | } 182 | 183 | if(!modifiers.isEmpty()) { 184 | operator.setModifiers(modifiers); 185 | } 186 | } 187 | 188 | depth--; 189 | 190 | boolean valid = EQLOperator.isValid(operator); 191 | operator.setValid(valid); 192 | 193 | return operator; 194 | } 195 | 196 | /* 197 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.SearchClauseContext and produces the 198 | * equivalent org.iptc.extra.core.eql.tree.nodes.SearchClause 199 | */ 200 | @Override 201 | public Node visitSearchClause(EqlParser.SearchClauseContext ctx) { 202 | 203 | SearchClause searchClause = new SearchClause(); 204 | searchClause.setDepth(depth); 205 | depth++; 206 | 207 | Node searchTerms = visit(ctx.searchTerm()); 208 | searchTerms.setParent(searchClause); 209 | searchClause.setSearchTerm((SearchTerm) searchTerms); 210 | 211 | if(ctx.index() != null) { 212 | if(ctx.relation() != null) { 213 | Index index = new Index(ctx.index().getText()); 214 | index.setDepth(depth); 215 | index.setParent(searchClause); 216 | searchClause.setIndex(index); 217 | 218 | Node relation = visit(ctx.relation()); 219 | relation.setParent(searchClause); 220 | searchClause.setRelation((Relation) relation); 221 | } 222 | } 223 | 224 | depth--; 225 | return searchClause; 226 | } 227 | 228 | /* 229 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.CommentClauseContext and produces the 230 | * equivalent org.iptc.extra.core.eql.tree.nodes.CommentClause 231 | */ 232 | @Override 233 | public Node visitCommentClause(EqlParser.CommentClauseContext ctx) { 234 | List terms = new ArrayList(); 235 | for(ParseTree child : ctx.children) { 236 | terms.add(child.getText()); 237 | } 238 | 239 | String comment = StringUtils.join(terms, " "); 240 | CommentClause commentClause = new CommentClause(comment); 241 | commentClause.setDepth(depth); 242 | 243 | return commentClause; 244 | } 245 | 246 | /* 247 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.RelationContext and produces the 248 | * equivalent org.iptc.extra.core.eql.tree.nodes.Relation 249 | */ 250 | @Override 251 | public Node visitRelation(EqlParser.RelationContext ctx) { 252 | String relationName = ctx.comparitor().getText(); 253 | 254 | Relation relation = new Relation(relationName); 255 | relation.setDepth(depth); 256 | depth++; 257 | 258 | if(ctx.modifierList() != null) { 259 | List modifiers = new ArrayList(); 260 | for(ModifierContext modCtx : ctx.modifierList().modifier()) { 261 | Modifier modifier = new Modifier(modCtx.modifierName().getText()); 262 | if(modCtx.comparitorSymbol() != null) { 263 | modifier.setComparitor(modCtx.comparitorSymbol().getText()); 264 | } 265 | if(modCtx.modifierValue() != null) { 266 | modifier.setValue(modCtx.modifierValue().getText()); 267 | } 268 | modifiers.add(modifier); 269 | } 270 | 271 | if(!modifiers.isEmpty()) { 272 | relation.setModifiers(modifiers); 273 | } 274 | } 275 | 276 | depth--; 277 | 278 | boolean valid = EQLRelation.isValid(relation); 279 | relation.setValid(valid); 280 | 281 | return relation; 282 | } 283 | 284 | /* 285 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.SearchTermContext and produces the 286 | * equivalent org.iptc.extra.core.eql.tree.nodes.SearchTerm 287 | */ 288 | @Override 289 | public Node visitSearchTerm(EqlParser.SearchTermContext ctx) { 290 | List terms = new ArrayList(); 291 | for(ParseTree child : ctx.children) { 292 | if(child.getText() != null && !child.getText().equals("\"") ) { 293 | terms.add(child.getText()); 294 | } 295 | } 296 | 297 | SearchTerm searchTerm = new SearchTerm(terms); 298 | searchTerm.setDepth(depth); 299 | 300 | return searchTerm; 301 | } 302 | 303 | /* 304 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.ReferenceClauseContext and produces the 305 | * equivalent org.iptc.extra.core.eql.tree.nodes.ReferenceClause 306 | */ 307 | @Override 308 | public Node visitReferenceClause(EqlParser.ReferenceClauseContext ctx) { 309 | String ruleId = ctx.referencedRule().getText(); 310 | 311 | ReferenceClause clause = new ReferenceClause(); 312 | clause.setRuleId(ruleId); 313 | clause.setDepth(depth); 314 | 315 | return clause; 316 | } 317 | 318 | /* 319 | * Visits org.iptc.extra.core.eql.parsers.EqlParser.ErrorNode and produces the 320 | * equivalent org.iptc.extra.core.eql.tree.nodes.ErrorMessageNode 321 | */ 322 | @Override 323 | public Node visitErrorNode(ErrorNode node) { 324 | ErrorMessageNode errorMsgNode = new ErrorMessageNode(); 325 | errorMsgNode.setErrorMessage(node.getText()); 326 | errorMsgNode.setDepth(depth); 327 | 328 | return errorMsgNode; 329 | } 330 | 331 | }; 332 | 333 | Node root = visitor.visit(tree); 334 | return root; 335 | } 336 | 337 | 338 | protected static class UnderlineListener extends BaseErrorListener { 339 | public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { 340 | System.err.println("line "+line+":"+charPositionInLine+" "+msg); 341 | underlineError(recognizer,(Token)offendingSymbol, line, charPositionInLine); 342 | } 343 | 344 | protected void underlineError(Recognizer recognizer, Token offendingToken, int line, int charPositionInLine) { 345 | 346 | CommonTokenStream tokens = (CommonTokenStream)recognizer.getInputStream(); 347 | String input = tokens.getTokenSource().getInputStream().toString(); 348 | String[] lines = input.split("\n"); 349 | String errorLine = lines[line - 1]; 350 | System.err.println(errorLine); 351 | for (int i=0; i=0 && stop>=0 ) { 355 | for (int i=start; i<=stop; i++) System.err.print("^"); 356 | } 357 | System.err.println(); 358 | } 359 | } 360 | 361 | protected static class SyntaxErrorsAggregator extends BaseErrorListener { 362 | 363 | private final List errors = new ArrayList(); 364 | 365 | public List getErrors() { 366 | return errors; 367 | } 368 | 369 | @Override 370 | public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { 371 | errors.add(new SyntaxError(line, charPositionInLine, (Token) offendingSymbol)); 372 | } 373 | 374 | } 375 | 376 | public static void main(String...args) { 377 | EQLMapper mapper = new EQLMapper(); 378 | 379 | String cql = "(and " 380 | + "(title any \"this is a test\")" 381 | + "(body adj \"phrase to match\")" 382 | + "(or " 383 | + "(title any \"term2\")" 384 | + "(title any/stemming \"term3\")" 385 | + "// this is a comment //" 386 | + ")" 387 | + ")"; 388 | 389 | 390 | SyntaxTree result = EQLParser.parse(cql); 391 | System.out.println(mapper.toString(result.getRootNode(), "\n", "\t")); 392 | 393 | System.out.println(mapper.toHtml(result.getRootNode(), "div")); 394 | 395 | } 396 | 397 | } 398 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/Eql.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | T__4=5 6 | T__5=6 7 | T__6=7 8 | T__7=8 9 | T__8=9 10 | T__9=10 11 | T__10=11 12 | T__11=12 13 | T__12=13 14 | AND=14 15 | OR=15 16 | NOT=16 17 | PROX=17 18 | CHARS=18 19 | WS=19 20 | '('=1 21 | ')'=2 22 | '//'=3 23 | '@ref'=4 24 | '=='=5 25 | '/'=6 26 | '='=7 27 | '>'=8 28 | '<'=9 29 | '>='=10 30 | '<='=11 31 | '<>'=12 32 | '"'=13 33 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlBaseListener.java: -------------------------------------------------------------------------------- 1 | // Generated from /home/manosetro/IdeaProjects/cql-parser/parser/Eql.g4 by ANTLR 4.5.3 2 | 3 | package org.iptc.extra.core.eql.antlr; 4 | 5 | 6 | import org.antlr.v4.runtime.ParserRuleContext; 7 | import org.antlr.v4.runtime.tree.ErrorNode; 8 | import org.antlr.v4.runtime.tree.TerminalNode; 9 | 10 | /** 11 | * This class provides an empty implementation of {@link EqlListener}, 12 | * which can be extended to create a listener which only needs to handle a subset 13 | * of the available methods. 14 | */ 15 | public class EqlBaseListener implements EqlListener { 16 | /** 17 | * {@inheritDoc} 18 | * 19 | *

The default implementation does nothing.

20 | */ 21 | @Override public void enterPrefixClause(EqlParser.PrefixClauseContext ctx) { } 22 | /** 23 | * {@inheritDoc} 24 | * 25 | *

The default implementation does nothing.

26 | */ 27 | @Override public void exitPrefixClause(EqlParser.PrefixClauseContext ctx) { } 28 | /** 29 | * {@inheritDoc} 30 | * 31 | *

The default implementation does nothing.

32 | */ 33 | @Override public void enterStatement(EqlParser.StatementContext ctx) { } 34 | /** 35 | * {@inheritDoc} 36 | * 37 | *

The default implementation does nothing.

38 | */ 39 | @Override public void exitStatement(EqlParser.StatementContext ctx) { } 40 | /** 41 | * {@inheritDoc} 42 | * 43 | *

The default implementation does nothing.

44 | */ 45 | @Override public void enterBooleanOp(EqlParser.BooleanOpContext ctx) { } 46 | /** 47 | * {@inheritDoc} 48 | * 49 | *

The default implementation does nothing.

50 | */ 51 | @Override public void exitBooleanOp(EqlParser.BooleanOpContext ctx) { } 52 | /** 53 | * {@inheritDoc} 54 | * 55 | *

The default implementation does nothing.

56 | */ 57 | @Override public void enterSearchClause(EqlParser.SearchClauseContext ctx) { } 58 | /** 59 | * {@inheritDoc} 60 | * 61 | *

The default implementation does nothing.

62 | */ 63 | @Override public void exitSearchClause(EqlParser.SearchClauseContext ctx) { } 64 | /** 65 | * {@inheritDoc} 66 | * 67 | *

The default implementation does nothing.

68 | */ 69 | @Override public void enterCommentClause(EqlParser.CommentClauseContext ctx) { } 70 | /** 71 | * {@inheritDoc} 72 | * 73 | *

The default implementation does nothing.

74 | */ 75 | @Override public void exitCommentClause(EqlParser.CommentClauseContext ctx) { } 76 | /** 77 | * {@inheritDoc} 78 | * 79 | *

The default implementation does nothing.

80 | */ 81 | @Override public void enterReferenceClause(EqlParser.ReferenceClauseContext ctx) { } 82 | /** 83 | * {@inheritDoc} 84 | * 85 | *

The default implementation does nothing.

86 | */ 87 | @Override public void exitReferenceClause(EqlParser.ReferenceClauseContext ctx) { } 88 | /** 89 | * {@inheritDoc} 90 | * 91 | *

The default implementation does nothing.

92 | */ 93 | @Override public void enterReferencedRule(EqlParser.ReferencedRuleContext ctx) { } 94 | /** 95 | * {@inheritDoc} 96 | * 97 | *

The default implementation does nothing.

98 | */ 99 | @Override public void exitReferencedRule(EqlParser.ReferencedRuleContext ctx) { } 100 | /** 101 | * {@inheritDoc} 102 | * 103 | *

The default implementation does nothing.

104 | */ 105 | @Override public void enterRef(EqlParser.RefContext ctx) { } 106 | /** 107 | * {@inheritDoc} 108 | * 109 | *

The default implementation does nothing.

110 | */ 111 | @Override public void exitRef(EqlParser.RefContext ctx) { } 112 | /** 113 | * {@inheritDoc} 114 | * 115 | *

The default implementation does nothing.

116 | */ 117 | @Override public void enterRelation(EqlParser.RelationContext ctx) { } 118 | /** 119 | * {@inheritDoc} 120 | * 121 | *

The default implementation does nothing.

122 | */ 123 | @Override public void exitRelation(EqlParser.RelationContext ctx) { } 124 | /** 125 | * {@inheritDoc} 126 | * 127 | *

The default implementation does nothing.

128 | */ 129 | @Override public void enterModifierList(EqlParser.ModifierListContext ctx) { } 130 | /** 131 | * {@inheritDoc} 132 | * 133 | *

The default implementation does nothing.

134 | */ 135 | @Override public void exitModifierList(EqlParser.ModifierListContext ctx) { } 136 | /** 137 | * {@inheritDoc} 138 | * 139 | *

The default implementation does nothing.

140 | */ 141 | @Override public void enterModifier(EqlParser.ModifierContext ctx) { } 142 | /** 143 | * {@inheritDoc} 144 | * 145 | *

The default implementation does nothing.

146 | */ 147 | @Override public void exitModifier(EqlParser.ModifierContext ctx) { } 148 | /** 149 | * {@inheritDoc} 150 | * 151 | *

The default implementation does nothing.

152 | */ 153 | @Override public void enterComparitor(EqlParser.ComparitorContext ctx) { } 154 | /** 155 | * {@inheritDoc} 156 | * 157 | *

The default implementation does nothing.

158 | */ 159 | @Override public void exitComparitor(EqlParser.ComparitorContext ctx) { } 160 | /** 161 | * {@inheritDoc} 162 | * 163 | *

The default implementation does nothing.

164 | */ 165 | @Override public void enterNamedComparitor(EqlParser.NamedComparitorContext ctx) { } 166 | /** 167 | * {@inheritDoc} 168 | * 169 | *

The default implementation does nothing.

170 | */ 171 | @Override public void exitNamedComparitor(EqlParser.NamedComparitorContext ctx) { } 172 | /** 173 | * {@inheritDoc} 174 | * 175 | *

The default implementation does nothing.

176 | */ 177 | @Override public void enterComparitorSymbol(EqlParser.ComparitorSymbolContext ctx) { } 178 | /** 179 | * {@inheritDoc} 180 | * 181 | *

The default implementation does nothing.

182 | */ 183 | @Override public void exitComparitorSymbol(EqlParser.ComparitorSymbolContext ctx) { } 184 | /** 185 | * {@inheritDoc} 186 | * 187 | *

The default implementation does nothing.

188 | */ 189 | @Override public void enterModifierName(EqlParser.ModifierNameContext ctx) { } 190 | /** 191 | * {@inheritDoc} 192 | * 193 | *

The default implementation does nothing.

194 | */ 195 | @Override public void exitModifierName(EqlParser.ModifierNameContext ctx) { } 196 | /** 197 | * {@inheritDoc} 198 | * 199 | *

The default implementation does nothing.

200 | */ 201 | @Override public void enterModifierValue(EqlParser.ModifierValueContext ctx) { } 202 | /** 203 | * {@inheritDoc} 204 | * 205 | *

The default implementation does nothing.

206 | */ 207 | @Override public void exitModifierValue(EqlParser.ModifierValueContext ctx) { } 208 | /** 209 | * {@inheritDoc} 210 | * 211 | *

The default implementation does nothing.

212 | */ 213 | @Override public void enterSearchTerm(EqlParser.SearchTermContext ctx) { } 214 | /** 215 | * {@inheritDoc} 216 | * 217 | *

The default implementation does nothing.

218 | */ 219 | @Override public void exitSearchTerm(EqlParser.SearchTermContext ctx) { } 220 | /** 221 | * {@inheritDoc} 222 | * 223 | *

The default implementation does nothing.

224 | */ 225 | @Override public void enterIndex(EqlParser.IndexContext ctx) { } 226 | /** 227 | * {@inheritDoc} 228 | * 229 | *

The default implementation does nothing.

230 | */ 231 | @Override public void exitIndex(EqlParser.IndexContext ctx) { } 232 | 233 | /** 234 | * {@inheritDoc} 235 | * 236 | *

The default implementation does nothing.

237 | */ 238 | @Override public void enterEveryRule(ParserRuleContext ctx) { } 239 | /** 240 | * {@inheritDoc} 241 | * 242 | *

The default implementation does nothing.

243 | */ 244 | @Override public void exitEveryRule(ParserRuleContext ctx) { } 245 | /** 246 | * {@inheritDoc} 247 | * 248 | *

The default implementation does nothing.

249 | */ 250 | @Override public void visitTerminal(TerminalNode node) { } 251 | /** 252 | * {@inheritDoc} 253 | * 254 | *

The default implementation does nothing.

255 | */ 256 | @Override public void visitErrorNode(ErrorNode node) { } 257 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlBaseVisitor.java: -------------------------------------------------------------------------------- 1 | // Generated from /home/manosetro/IdeaProjects/cql-parser/parser/Eql.g4 by ANTLR 4.5.3 2 | 3 | package org.iptc.extra.core.eql.antlr; 4 | 5 | import org.antlr.v4.runtime.tree.AbstractParseTreeVisitor; 6 | 7 | /** 8 | * This class provides an empty implementation of {@link EqlVisitor}, 9 | * which can be extended to create a visitor which only needs to handle a subset 10 | * of the available methods. 11 | * 12 | * @param The return type of the visit operation. Use {@link Void} for 13 | * operations with no return type. 14 | */ 15 | public class EqlBaseVisitor extends AbstractParseTreeVisitor implements EqlVisitor { 16 | /** 17 | * {@inheritDoc} 18 | * 19 | *

The default implementation returns the result of calling 20 | * {@link #visitChildren} on {@code ctx}.

21 | */ 22 | @Override public T visitPrefixClause(EqlParser.PrefixClauseContext ctx) { return visitChildren(ctx); } 23 | /** 24 | * {@inheritDoc} 25 | * 26 | *

The default implementation returns the result of calling 27 | * {@link #visitChildren} on {@code ctx}.

28 | */ 29 | @Override public T visitStatement(EqlParser.StatementContext ctx) { return visitChildren(ctx); } 30 | /** 31 | * {@inheritDoc} 32 | * 33 | *

The default implementation returns the result of calling 34 | * {@link #visitChildren} on {@code ctx}.

35 | */ 36 | @Override public T visitBooleanOp(EqlParser.BooleanOpContext ctx) { return visitChildren(ctx); } 37 | /** 38 | * {@inheritDoc} 39 | * 40 | *

The default implementation returns the result of calling 41 | * {@link #visitChildren} on {@code ctx}.

42 | */ 43 | @Override public T visitSearchClause(EqlParser.SearchClauseContext ctx) { return visitChildren(ctx); } 44 | /** 45 | * {@inheritDoc} 46 | * 47 | *

The default implementation returns the result of calling 48 | * {@link #visitChildren} on {@code ctx}.

49 | */ 50 | @Override public T visitCommentClause(EqlParser.CommentClauseContext ctx) { return visitChildren(ctx); } 51 | /** 52 | * {@inheritDoc} 53 | * 54 | *

The default implementation returns the result of calling 55 | * {@link #visitChildren} on {@code ctx}.

56 | */ 57 | @Override public T visitReferenceClause(EqlParser.ReferenceClauseContext ctx) { return visitChildren(ctx); } 58 | /** 59 | * {@inheritDoc} 60 | * 61 | *

The default implementation returns the result of calling 62 | * {@link #visitChildren} on {@code ctx}.

63 | */ 64 | @Override public T visitReferencedRule(EqlParser.ReferencedRuleContext ctx) { return visitChildren(ctx); } 65 | /** 66 | * {@inheritDoc} 67 | * 68 | *

The default implementation returns the result of calling 69 | * {@link #visitChildren} on {@code ctx}.

70 | */ 71 | @Override public T visitRef(EqlParser.RefContext ctx) { return visitChildren(ctx); } 72 | /** 73 | * {@inheritDoc} 74 | * 75 | *

The default implementation returns the result of calling 76 | * {@link #visitChildren} on {@code ctx}.

77 | */ 78 | @Override public T visitRelation(EqlParser.RelationContext ctx) { return visitChildren(ctx); } 79 | /** 80 | * {@inheritDoc} 81 | * 82 | *

The default implementation returns the result of calling 83 | * {@link #visitChildren} on {@code ctx}.

84 | */ 85 | @Override public T visitModifierList(EqlParser.ModifierListContext ctx) { return visitChildren(ctx); } 86 | /** 87 | * {@inheritDoc} 88 | * 89 | *

The default implementation returns the result of calling 90 | * {@link #visitChildren} on {@code ctx}.

91 | */ 92 | @Override public T visitModifier(EqlParser.ModifierContext ctx) { return visitChildren(ctx); } 93 | /** 94 | * {@inheritDoc} 95 | * 96 | *

The default implementation returns the result of calling 97 | * {@link #visitChildren} on {@code ctx}.

98 | */ 99 | @Override public T visitComparitor(EqlParser.ComparitorContext ctx) { return visitChildren(ctx); } 100 | /** 101 | * {@inheritDoc} 102 | * 103 | *

The default implementation returns the result of calling 104 | * {@link #visitChildren} on {@code ctx}.

105 | */ 106 | @Override public T visitNamedComparitor(EqlParser.NamedComparitorContext ctx) { return visitChildren(ctx); } 107 | /** 108 | * {@inheritDoc} 109 | * 110 | *

The default implementation returns the result of calling 111 | * {@link #visitChildren} on {@code ctx}.

112 | */ 113 | @Override public T visitComparitorSymbol(EqlParser.ComparitorSymbolContext ctx) { return visitChildren(ctx); } 114 | /** 115 | * {@inheritDoc} 116 | * 117 | *

The default implementation returns the result of calling 118 | * {@link #visitChildren} on {@code ctx}.

119 | */ 120 | @Override public T visitModifierName(EqlParser.ModifierNameContext ctx) { return visitChildren(ctx); } 121 | /** 122 | * {@inheritDoc} 123 | * 124 | *

The default implementation returns the result of calling 125 | * {@link #visitChildren} on {@code ctx}.

126 | */ 127 | @Override public T visitModifierValue(EqlParser.ModifierValueContext ctx) { return visitChildren(ctx); } 128 | /** 129 | * {@inheritDoc} 130 | * 131 | *

The default implementation returns the result of calling 132 | * {@link #visitChildren} on {@code ctx}.

133 | */ 134 | @Override public T visitSearchTerm(EqlParser.SearchTermContext ctx) { return visitChildren(ctx); } 135 | /** 136 | * {@inheritDoc} 137 | * 138 | *

The default implementation returns the result of calling 139 | * {@link #visitChildren} on {@code ctx}.

140 | */ 141 | @Override public T visitIndex(EqlParser.IndexContext ctx) { return visitChildren(ctx); } 142 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlLexer.java: -------------------------------------------------------------------------------- 1 | // Generated from /home/manosetro/IdeaProjects/cql-parser/parser/Eql.g4 by ANTLR 4.5.3 2 | 3 | package org.iptc.extra.core.eql.antlr; 4 | 5 | import org.antlr.v4.runtime.Lexer; 6 | import org.antlr.v4.runtime.CharStream; 7 | import org.antlr.v4.runtime.Token; 8 | import org.antlr.v4.runtime.TokenStream; 9 | import org.antlr.v4.runtime.*; 10 | import org.antlr.v4.runtime.atn.*; 11 | import org.antlr.v4.runtime.dfa.DFA; 12 | import org.antlr.v4.runtime.misc.*; 13 | 14 | @SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"}) 15 | public class EqlLexer extends Lexer { 16 | static { RuntimeMetaData.checkVersion("4.5.3", RuntimeMetaData.VERSION); } 17 | 18 | protected static final DFA[] _decisionToDFA; 19 | protected static final PredictionContextCache _sharedContextCache = 20 | new PredictionContextCache(); 21 | public static final int 22 | T__0=1, T__1=2, T__2=3, T__3=4, T__4=5, T__5=6, T__6=7, T__7=8, T__8=9, 23 | T__9=10, T__10=11, T__11=12, T__12=13, AND=14, OR=15, NOT=16, PROX=17, 24 | CHARS=18, WS=19; 25 | public static String[] modeNames = { 26 | "DEFAULT_MODE" 27 | }; 28 | 29 | public static final String[] ruleNames = { 30 | "T__0", "T__1", "T__2", "T__3", "T__4", "T__5", "T__6", "T__7", "T__8", 31 | "T__9", "T__10", "T__11", "T__12", "AND", "OR", "NOT", "PROX", "CHARS", 32 | "WS" 33 | }; 34 | 35 | private static final String[] _LITERAL_NAMES = { 36 | null, "'('", "')'", "'//'", "'@ref'", "'=='", "'/'", "'='", "'>'", "'<'", 37 | "'>='", "'<='", "'<>'", "'\"'" 38 | }; 39 | private static final String[] _SYMBOLIC_NAMES = { 40 | null, null, null, null, null, null, null, null, null, null, null, null, 41 | null, null, "AND", "OR", "NOT", "PROX", "CHARS", "WS" 42 | }; 43 | public static final Vocabulary VOCABULARY = new VocabularyImpl(_LITERAL_NAMES, _SYMBOLIC_NAMES); 44 | 45 | /** 46 | * @deprecated Use {@link #VOCABULARY} instead. 47 | */ 48 | @Deprecated 49 | public static final String[] tokenNames; 50 | static { 51 | tokenNames = new String[_SYMBOLIC_NAMES.length]; 52 | for (int i = 0; i < tokenNames.length; i++) { 53 | tokenNames[i] = VOCABULARY.getLiteralName(i); 54 | if (tokenNames[i] == null) { 55 | tokenNames[i] = VOCABULARY.getSymbolicName(i); 56 | } 57 | 58 | if (tokenNames[i] == null) { 59 | tokenNames[i] = ""; 60 | } 61 | } 62 | } 63 | 64 | @Override 65 | @Deprecated 66 | public String[] getTokenNames() { 67 | return tokenNames; 68 | } 69 | 70 | @Override 71 | 72 | public Vocabulary getVocabulary() { 73 | return VOCABULARY; 74 | } 75 | 76 | 77 | public EqlLexer(CharStream input) { 78 | super(input); 79 | _interp = new LexerATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache); 80 | } 81 | 82 | @Override 83 | public String getGrammarFileName() { return "Eql.g4"; } 84 | 85 | @Override 86 | public String[] getRuleNames() { return ruleNames; } 87 | 88 | @Override 89 | public String getSerializedATN() { return _serializedATN; } 90 | 91 | @Override 92 | public String[] getModeNames() { return modeNames; } 93 | 94 | @Override 95 | public ATN getATN() { return _ATN; } 96 | 97 | public static final String _serializedATN = 98 | "\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2\25w\b\1\4\2\t\2\4"+ 99 | "\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t"+ 100 | "\13\4\f\t\f\4\r\t\r\4\16\t\16\4\17\t\17\4\20\t\20\4\21\t\21\4\22\t\22"+ 101 | "\4\23\t\23\4\24\t\24\3\2\3\2\3\3\3\3\3\4\3\4\3\4\3\5\3\5\3\5\3\5\3\5\3"+ 102 | "\6\3\6\3\6\3\7\3\7\3\b\3\b\3\t\3\t\3\n\3\n\3\13\3\13\3\13\3\f\3\f\3\f"+ 103 | "\3\r\3\r\3\r\3\16\3\16\3\17\3\17\3\17\3\17\3\17\3\17\5\17R\n\17\3\20\3"+ 104 | "\20\3\20\3\20\5\20X\n\20\3\21\3\21\3\21\3\21\3\21\3\21\5\21`\n\21\3\22"+ 105 | "\3\22\3\22\3\22\3\22\3\22\3\22\3\22\5\22j\n\22\3\23\6\23m\n\23\r\23\16"+ 106 | "\23n\3\24\6\24r\n\24\r\24\16\24s\3\24\3\24\2\2\25\3\3\5\4\7\5\t\6\13\7"+ 107 | "\r\b\17\t\21\n\23\13\25\f\27\r\31\16\33\17\35\20\37\21!\22#\23%\24\'\25"+ 108 | "\3\2\4\b\2\f\f\"\"$$*+\61\61>@\5\2\13\f\17\17\"\"|\2\3\3\2\2\2\2\5\3\2"+ 109 | "\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\21"+ 110 | "\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2"+ 111 | "\2\2\2\35\3\2\2\2\2\37\3\2\2\2\2!\3\2\2\2\2#\3\2\2\2\2%\3\2\2\2\2\'\3"+ 112 | "\2\2\2\3)\3\2\2\2\5+\3\2\2\2\7-\3\2\2\2\t\60\3\2\2\2\13\65\3\2\2\2\r8"+ 113 | "\3\2\2\2\17:\3\2\2\2\21<\3\2\2\2\23>\3\2\2\2\25@\3\2\2\2\27C\3\2\2\2\31"+ 114 | "F\3\2\2\2\33I\3\2\2\2\35Q\3\2\2\2\37W\3\2\2\2!_\3\2\2\2#i\3\2\2\2%l\3"+ 115 | "\2\2\2\'q\3\2\2\2)*\7*\2\2*\4\3\2\2\2+,\7+\2\2,\6\3\2\2\2-.\7\61\2\2."+ 116 | "/\7\61\2\2/\b\3\2\2\2\60\61\7B\2\2\61\62\7t\2\2\62\63\7g\2\2\63\64\7h"+ 117 | "\2\2\64\n\3\2\2\2\65\66\7?\2\2\66\67\7?\2\2\67\f\3\2\2\289\7\61\2\29\16"+ 118 | "\3\2\2\2:;\7?\2\2;\20\3\2\2\2<=\7@\2\2=\22\3\2\2\2>?\7>\2\2?\24\3\2\2"+ 119 | "\2@A\7@\2\2AB\7?\2\2B\26\3\2\2\2CD\7>\2\2DE\7?\2\2E\30\3\2\2\2FG\7>\2"+ 120 | "\2GH\7@\2\2H\32\3\2\2\2IJ\7$\2\2J\34\3\2\2\2KL\7C\2\2LM\7P\2\2MR\7F\2"+ 121 | "\2NO\7c\2\2OP\7p\2\2PR\7f\2\2QK\3\2\2\2QN\3\2\2\2R\36\3\2\2\2ST\7Q\2\2"+ 122 | "TX\7T\2\2UV\7q\2\2VX\7t\2\2WS\3\2\2\2WU\3\2\2\2X \3\2\2\2YZ\7P\2\2Z[\7"+ 123 | "Q\2\2[`\7V\2\2\\]\7p\2\2]^\7q\2\2^`\7v\2\2_Y\3\2\2\2_\\\3\2\2\2`\"\3\2"+ 124 | "\2\2ab\7R\2\2bc\7T\2\2cd\7Q\2\2dj\7Z\2\2ef\7r\2\2fg\7t\2\2gh\7q\2\2hj"+ 125 | "\7z\2\2ia\3\2\2\2ie\3\2\2\2j$\3\2\2\2km\n\2\2\2lk\3\2\2\2mn\3\2\2\2nl"+ 126 | "\3\2\2\2no\3\2\2\2o&\3\2\2\2pr\t\3\2\2qp\3\2\2\2rs\3\2\2\2sq\3\2\2\2s"+ 127 | "t\3\2\2\2tu\3\2\2\2uv\b\24\2\2v(\3\2\2\2\t\2QW_ins\3\2\3\2"; 128 | public static final ATN _ATN = 129 | new ATNDeserializer().deserialize(_serializedATN.toCharArray()); 130 | static { 131 | _decisionToDFA = new DFA[_ATN.getNumberOfDecisions()]; 132 | for (int i = 0; i < _ATN.getNumberOfDecisions(); i++) { 133 | _decisionToDFA[i] = new DFA(_ATN.getDecisionState(i), i); 134 | } 135 | } 136 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlLexer.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | T__4=5 6 | T__5=6 7 | T__6=7 8 | T__7=8 9 | T__8=9 10 | T__9=10 11 | T__10=11 12 | T__11=12 13 | T__12=13 14 | AND=14 15 | OR=15 16 | NOT=16 17 | PROX=17 18 | CHARS=18 19 | WS=19 20 | '('=1 21 | ')'=2 22 | '//'=3 23 | '@ref'=4 24 | '=='=5 25 | '/'=6 26 | '='=7 27 | '>'=8 28 | '<'=9 29 | '>='=10 30 | '<='=11 31 | '<>'=12 32 | '"'=13 33 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlListener.java: -------------------------------------------------------------------------------- 1 | // Generated from /home/manosetro/IdeaProjects/cql-parser/parser/Eql.g4 by ANTLR 4.5.3 2 | 3 | package org.iptc.extra.core.eql.antlr; 4 | 5 | import org.antlr.v4.runtime.tree.ParseTreeListener; 6 | 7 | /** 8 | * This interface defines a complete listener for a parse tree produced by 9 | * {@link EqlParser}. 10 | */ 11 | public interface EqlListener extends ParseTreeListener { 12 | /** 13 | * Enter a parse tree produced by {@link EqlParser#prefixClause}. 14 | * @param ctx the parse tree 15 | */ 16 | void enterPrefixClause(EqlParser.PrefixClauseContext ctx); 17 | /** 18 | * Exit a parse tree produced by {@link EqlParser#prefixClause}. 19 | * @param ctx the parse tree 20 | */ 21 | void exitPrefixClause(EqlParser.PrefixClauseContext ctx); 22 | /** 23 | * Enter a parse tree produced by {@link EqlParser#statement}. 24 | * @param ctx the parse tree 25 | */ 26 | void enterStatement(EqlParser.StatementContext ctx); 27 | /** 28 | * Exit a parse tree produced by {@link EqlParser#statement}. 29 | * @param ctx the parse tree 30 | */ 31 | void exitStatement(EqlParser.StatementContext ctx); 32 | /** 33 | * Enter a parse tree produced by {@link EqlParser#booleanOp}. 34 | * @param ctx the parse tree 35 | */ 36 | void enterBooleanOp(EqlParser.BooleanOpContext ctx); 37 | /** 38 | * Exit a parse tree produced by {@link EqlParser#booleanOp}. 39 | * @param ctx the parse tree 40 | */ 41 | void exitBooleanOp(EqlParser.BooleanOpContext ctx); 42 | /** 43 | * Enter a parse tree produced by {@link EqlParser#searchClause}. 44 | * @param ctx the parse tree 45 | */ 46 | void enterSearchClause(EqlParser.SearchClauseContext ctx); 47 | /** 48 | * Exit a parse tree produced by {@link EqlParser#searchClause}. 49 | * @param ctx the parse tree 50 | */ 51 | void exitSearchClause(EqlParser.SearchClauseContext ctx); 52 | /** 53 | * Enter a parse tree produced by {@link EqlParser#commentClause}. 54 | * @param ctx the parse tree 55 | */ 56 | void enterCommentClause(EqlParser.CommentClauseContext ctx); 57 | /** 58 | * Exit a parse tree produced by {@link EqlParser#commentClause}. 59 | * @param ctx the parse tree 60 | */ 61 | void exitCommentClause(EqlParser.CommentClauseContext ctx); 62 | /** 63 | * Enter a parse tree produced by {@link EqlParser#referenceClause}. 64 | * @param ctx the parse tree 65 | */ 66 | void enterReferenceClause(EqlParser.ReferenceClauseContext ctx); 67 | /** 68 | * Exit a parse tree produced by {@link EqlParser#referenceClause}. 69 | * @param ctx the parse tree 70 | */ 71 | void exitReferenceClause(EqlParser.ReferenceClauseContext ctx); 72 | /** 73 | * Enter a parse tree produced by {@link EqlParser#referencedRule}. 74 | * @param ctx the parse tree 75 | */ 76 | void enterReferencedRule(EqlParser.ReferencedRuleContext ctx); 77 | /** 78 | * Exit a parse tree produced by {@link EqlParser#referencedRule}. 79 | * @param ctx the parse tree 80 | */ 81 | void exitReferencedRule(EqlParser.ReferencedRuleContext ctx); 82 | /** 83 | * Enter a parse tree produced by {@link EqlParser#ref}. 84 | * @param ctx the parse tree 85 | */ 86 | void enterRef(EqlParser.RefContext ctx); 87 | /** 88 | * Exit a parse tree produced by {@link EqlParser#ref}. 89 | * @param ctx the parse tree 90 | */ 91 | void exitRef(EqlParser.RefContext ctx); 92 | /** 93 | * Enter a parse tree produced by {@link EqlParser#relation}. 94 | * @param ctx the parse tree 95 | */ 96 | void enterRelation(EqlParser.RelationContext ctx); 97 | /** 98 | * Exit a parse tree produced by {@link EqlParser#relation}. 99 | * @param ctx the parse tree 100 | */ 101 | void exitRelation(EqlParser.RelationContext ctx); 102 | /** 103 | * Enter a parse tree produced by {@link EqlParser#modifierList}. 104 | * @param ctx the parse tree 105 | */ 106 | void enterModifierList(EqlParser.ModifierListContext ctx); 107 | /** 108 | * Exit a parse tree produced by {@link EqlParser#modifierList}. 109 | * @param ctx the parse tree 110 | */ 111 | void exitModifierList(EqlParser.ModifierListContext ctx); 112 | /** 113 | * Enter a parse tree produced by {@link EqlParser#modifier}. 114 | * @param ctx the parse tree 115 | */ 116 | void enterModifier(EqlParser.ModifierContext ctx); 117 | /** 118 | * Exit a parse tree produced by {@link EqlParser#modifier}. 119 | * @param ctx the parse tree 120 | */ 121 | void exitModifier(EqlParser.ModifierContext ctx); 122 | /** 123 | * Enter a parse tree produced by {@link EqlParser#comparitor}. 124 | * @param ctx the parse tree 125 | */ 126 | void enterComparitor(EqlParser.ComparitorContext ctx); 127 | /** 128 | * Exit a parse tree produced by {@link EqlParser#comparitor}. 129 | * @param ctx the parse tree 130 | */ 131 | void exitComparitor(EqlParser.ComparitorContext ctx); 132 | /** 133 | * Enter a parse tree produced by {@link EqlParser#namedComparitor}. 134 | * @param ctx the parse tree 135 | */ 136 | void enterNamedComparitor(EqlParser.NamedComparitorContext ctx); 137 | /** 138 | * Exit a parse tree produced by {@link EqlParser#namedComparitor}. 139 | * @param ctx the parse tree 140 | */ 141 | void exitNamedComparitor(EqlParser.NamedComparitorContext ctx); 142 | /** 143 | * Enter a parse tree produced by {@link EqlParser#comparitorSymbol}. 144 | * @param ctx the parse tree 145 | */ 146 | void enterComparitorSymbol(EqlParser.ComparitorSymbolContext ctx); 147 | /** 148 | * Exit a parse tree produced by {@link EqlParser#comparitorSymbol}. 149 | * @param ctx the parse tree 150 | */ 151 | void exitComparitorSymbol(EqlParser.ComparitorSymbolContext ctx); 152 | /** 153 | * Enter a parse tree produced by {@link EqlParser#modifierName}. 154 | * @param ctx the parse tree 155 | */ 156 | void enterModifierName(EqlParser.ModifierNameContext ctx); 157 | /** 158 | * Exit a parse tree produced by {@link EqlParser#modifierName}. 159 | * @param ctx the parse tree 160 | */ 161 | void exitModifierName(EqlParser.ModifierNameContext ctx); 162 | /** 163 | * Enter a parse tree produced by {@link EqlParser#modifierValue}. 164 | * @param ctx the parse tree 165 | */ 166 | void enterModifierValue(EqlParser.ModifierValueContext ctx); 167 | /** 168 | * Exit a parse tree produced by {@link EqlParser#modifierValue}. 169 | * @param ctx the parse tree 170 | */ 171 | void exitModifierValue(EqlParser.ModifierValueContext ctx); 172 | /** 173 | * Enter a parse tree produced by {@link EqlParser#searchTerm}. 174 | * @param ctx the parse tree 175 | */ 176 | void enterSearchTerm(EqlParser.SearchTermContext ctx); 177 | /** 178 | * Exit a parse tree produced by {@link EqlParser#searchTerm}. 179 | * @param ctx the parse tree 180 | */ 181 | void exitSearchTerm(EqlParser.SearchTermContext ctx); 182 | /** 183 | * Enter a parse tree produced by {@link EqlParser#index}. 184 | * @param ctx the parse tree 185 | */ 186 | void enterIndex(EqlParser.IndexContext ctx); 187 | /** 188 | * Exit a parse tree produced by {@link EqlParser#index}. 189 | * @param ctx the parse tree 190 | */ 191 | void exitIndex(EqlParser.IndexContext ctx); 192 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/antlr/EqlVisitor.java: -------------------------------------------------------------------------------- 1 | // Generated from /home/manosetro/IdeaProjects/cql-parser/parser/Eql.g4 by ANTLR 4.5.3 2 | 3 | package org.iptc.extra.core.eql.antlr; 4 | 5 | import org.antlr.v4.runtime.tree.ParseTreeVisitor; 6 | 7 | /** 8 | * This interface defines a complete generic visitor for a parse tree produced 9 | * by {@link EqlParser}. 10 | * 11 | * @param The return type of the visit operation. Use {@link Void} for 12 | * operations with no return type. 13 | */ 14 | public interface EqlVisitor extends ParseTreeVisitor { 15 | /** 16 | * Visit a parse tree produced by {@link EqlParser#prefixClause}. 17 | * @param ctx the parse tree 18 | * @return the visitor result 19 | */ 20 | T visitPrefixClause(EqlParser.PrefixClauseContext ctx); 21 | /** 22 | * Visit a parse tree produced by {@link EqlParser#statement}. 23 | * @param ctx the parse tree 24 | * @return the visitor result 25 | */ 26 | T visitStatement(EqlParser.StatementContext ctx); 27 | /** 28 | * Visit a parse tree produced by {@link EqlParser#booleanOp}. 29 | * @param ctx the parse tree 30 | * @return the visitor result 31 | */ 32 | T visitBooleanOp(EqlParser.BooleanOpContext ctx); 33 | /** 34 | * Visit a parse tree produced by {@link EqlParser#searchClause}. 35 | * @param ctx the parse tree 36 | * @return the visitor result 37 | */ 38 | T visitSearchClause(EqlParser.SearchClauseContext ctx); 39 | /** 40 | * Visit a parse tree produced by {@link EqlParser#commentClause}. 41 | * @param ctx the parse tree 42 | * @return the visitor result 43 | */ 44 | T visitCommentClause(EqlParser.CommentClauseContext ctx); 45 | /** 46 | * Visit a parse tree produced by {@link EqlParser#referenceClause}. 47 | * @param ctx the parse tree 48 | * @return the visitor result 49 | */ 50 | T visitReferenceClause(EqlParser.ReferenceClauseContext ctx); 51 | /** 52 | * Visit a parse tree produced by {@link EqlParser#referencedRule}. 53 | * @param ctx the parse tree 54 | * @return the visitor result 55 | */ 56 | T visitReferencedRule(EqlParser.ReferencedRuleContext ctx); 57 | /** 58 | * Visit a parse tree produced by {@link EqlParser#ref}. 59 | * @param ctx the parse tree 60 | * @return the visitor result 61 | */ 62 | T visitRef(EqlParser.RefContext ctx); 63 | /** 64 | * Visit a parse tree produced by {@link EqlParser#relation}. 65 | * @param ctx the parse tree 66 | * @return the visitor result 67 | */ 68 | T visitRelation(EqlParser.RelationContext ctx); 69 | /** 70 | * Visit a parse tree produced by {@link EqlParser#modifierList}. 71 | * @param ctx the parse tree 72 | * @return the visitor result 73 | */ 74 | T visitModifierList(EqlParser.ModifierListContext ctx); 75 | /** 76 | * Visit a parse tree produced by {@link EqlParser#modifier}. 77 | * @param ctx the parse tree 78 | * @return the visitor result 79 | */ 80 | T visitModifier(EqlParser.ModifierContext ctx); 81 | /** 82 | * Visit a parse tree produced by {@link EqlParser#comparitor}. 83 | * @param ctx the parse tree 84 | * @return the visitor result 85 | */ 86 | T visitComparitor(EqlParser.ComparitorContext ctx); 87 | /** 88 | * Visit a parse tree produced by {@link EqlParser#namedComparitor}. 89 | * @param ctx the parse tree 90 | * @return the visitor result 91 | */ 92 | T visitNamedComparitor(EqlParser.NamedComparitorContext ctx); 93 | /** 94 | * Visit a parse tree produced by {@link EqlParser#comparitorSymbol}. 95 | * @param ctx the parse tree 96 | * @return the visitor result 97 | */ 98 | T visitComparitorSymbol(EqlParser.ComparitorSymbolContext ctx); 99 | /** 100 | * Visit a parse tree produced by {@link EqlParser#modifierName}. 101 | * @param ctx the parse tree 102 | * @return the visitor result 103 | */ 104 | T visitModifierName(EqlParser.ModifierNameContext ctx); 105 | /** 106 | * Visit a parse tree produced by {@link EqlParser#modifierValue}. 107 | * @param ctx the parse tree 108 | * @return the visitor result 109 | */ 110 | T visitModifierValue(EqlParser.ModifierValueContext ctx); 111 | /** 112 | * Visit a parse tree produced by {@link EqlParser#searchTerm}. 113 | * @param ctx the parse tree 114 | * @return the visitor result 115 | */ 116 | T visitSearchTerm(EqlParser.SearchTermContext ctx); 117 | /** 118 | * Visit a parse tree produced by {@link EqlParser#index}. 119 | * @param ctx the parse tree 120 | * @return the visitor result 121 | */ 122 | T visitIndex(EqlParser.IndexContext ctx); 123 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/SyntaxError.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree; 2 | 3 | import org.antlr.v4.runtime.Token; 4 | 5 | /** 6 | * @author manosetro 7 | * 8 | */ 9 | public class SyntaxError { 10 | 11 | private int line; 12 | private int position; 13 | private String offendingToken; 14 | private int[] offendingTokenPosition = new int[2]; 15 | 16 | public SyntaxError(int line, int position, Token offendingToken) { 17 | this.line = line; 18 | this.position = position; 19 | this.offendingToken = offendingToken.getText(); 20 | this.offendingTokenPosition[0] = offendingToken.getStartIndex(); 21 | this.offendingTokenPosition[1] = offendingToken.getStopIndex(); 22 | } 23 | 24 | public int getLine() { 25 | return line; 26 | } 27 | 28 | public void setLine(int line) { 29 | this.line = line; 30 | } 31 | 32 | public int getPosition() { 33 | return position; 34 | } 35 | 36 | public void setPosition(int position) { 37 | this.position = position; 38 | } 39 | 40 | public String getOffendingToken() { 41 | return offendingToken; 42 | } 43 | 44 | public void setOffendingToken(String offendingToken) { 45 | this.offendingToken = offendingToken; 46 | } 47 | 48 | public int[] getOffendingTokenPosition() { 49 | return offendingTokenPosition; 50 | } 51 | 52 | public void setOffendingTokenPosition(int[] offendingTokenPosition) { 53 | this.offendingTokenPosition = offendingTokenPosition; 54 | } 55 | 56 | public String toString() { 57 | return "Syntax Error at line " + line + ":" + position + ". Offending Token: " + offendingToken + " [" + offendingTokenPosition[0] + ":" + offendingTokenPosition[1] + "]"; 58 | } 59 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/SyntaxTree.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.iptc.extra.core.eql.tree.nodes.Node; 7 | 8 | /** 9 | * @author manosetro 10 | * 11 | * SyntaxTree class is the result of parsing of a given EQL rule. 12 | * 13 | */ 14 | public class SyntaxTree { 15 | 16 | private Node root; // the root node of the syntax tree. Is the entry node for each class used to traverse the tree. 17 | 18 | private List errors = new ArrayList(); // a list of syntax errors produced by Antlr during parsing 19 | 20 | public List getErrors() { 21 | return errors; 22 | } 23 | 24 | public void setErrors(List errors) { 25 | this.errors = errors; 26 | } 27 | 28 | public boolean hasErrors() { 29 | return errors.size() > 0; 30 | } 31 | 32 | public Node getRootNode() { 33 | return root; 34 | } 35 | 36 | public void setRootNode(Node root) { 37 | this.root = root; 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/extra/EQLOperator.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.extra; 2 | 3 | import java.util.List; 4 | 5 | import org.iptc.extra.core.eql.tree.nodes.Clause; 6 | import org.iptc.extra.core.eql.tree.nodes.Modifier; 7 | import org.iptc.extra.core.eql.tree.nodes.Operator; 8 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 9 | 10 | /** 11 | * 12 | * @author manos schinas 13 | * 14 | * A set of valid operators in EQL, built upon modified or, and, not, prox 15 | * 16 | */ 17 | public enum EQLOperator { 18 | AND, // and 19 | OR, // or 20 | NOT, // not 21 | MINIMUM, // or/countunique>n 22 | DISTANCE, // prox/unit=word/distancen 24 | MINIMUM_OCCURRENCE, // or/count>n 25 | MAXIMUM_OCCURRENCE, // or/count>n 26 | ORDER, // prox/ordered 27 | SENTENCE, // prox/unit=sentence/distance=1 28 | PARAGRAPH, // prox/unit=paragraph/distance=1 29 | NOT_IN_PHRASE, // prox/unit=word/distance>0 30 | NOT_IN_SENTENCE, // prox/unit=sentence/distance>1 31 | NOT_IN_PARAGRAPH, // prox/unit=paragraph/distance>1 32 | ORDER_AND_DISTANCE, // prox/unit=word/distance modifiers = operator.getModifiers(); 42 | String op = operator.getOperator(); 43 | if(op == null) { 44 | return null; 45 | } 46 | 47 | if(op.equals("and")) { 48 | if(modifiers == null || modifiers.isEmpty()) { 49 | return AND; 50 | } 51 | } 52 | else if (op.equals("or")) { 53 | if(modifiers == null || modifiers.isEmpty()) { 54 | return OR; 55 | } 56 | else { 57 | if(operator.hasModifier("count")) { 58 | Modifier modifier = operator.getModifier("count"); 59 | if(modifier.isComparitorGT() || modifier.isComparitorGTE()) { 60 | return MINIMUM_OCCURRENCE; 61 | } 62 | 63 | if(modifier.isComparitorLT() || modifier.isComparitorLTE()) { 64 | return MAXIMUM_OCCURRENCE; 65 | } 66 | } 67 | if(operator.hasModifier("countunique")) { 68 | Modifier modifier = operator.getModifier("countunique"); 69 | if(modifier.isComparitorGT() || modifier.isComparitorGTE()) { 70 | return MINIMUM; 71 | } 72 | } 73 | } 74 | } 75 | else if(op.equals("not")) { 76 | return NOT; 77 | } 78 | else if(op.equals("prox")) { 79 | if(modifiers != null && !modifiers.isEmpty()) { 80 | if(operator.hasModifier("distance") && operator.hasModifier("unit")) { 81 | Modifier distanceModifier = operator.getModifier("distance"); 82 | Modifier unitModifier = operator.getModifier("unit"); 83 | 84 | // word-level distances 85 | if(unitModifier.isComparitorEQ() && unitModifier.valueEquals("word")) { 86 | if(distanceModifier.isComparitorLTE() || distanceModifier.isComparitorLT()) { 87 | if(operator.hasModifier("ordered")) { 88 | return ORDER_AND_DISTANCE; 89 | } 90 | return DISTANCE; 91 | } 92 | 93 | if(distanceModifier.isComparitorGT()) { 94 | if(distanceModifier.valueEquals("0")) { 95 | return NOT_IN_PHRASE; 96 | } 97 | 98 | return NOT_WITHIN_DISTANCE; 99 | } 100 | } 101 | 102 | // sentence-level distances 103 | if(unitModifier.isComparitorEQ() && unitModifier.valueEquals("sentence")) { 104 | if(distanceModifier.isComparitorEQ() && distanceModifier.valueEquals("1")) { 105 | return SENTENCE; 106 | } 107 | if(distanceModifier.isComparitorGT()) { 108 | return NOT_IN_SENTENCE; 109 | } 110 | } 111 | 112 | // paragraph-level distances 113 | if(unitModifier.isComparitorEQ() && unitModifier.valueEquals("paragraph")) { 114 | if(distanceModifier.isComparitorEQ() && distanceModifier.valueEquals("1")) { 115 | return PARAGRAPH; 116 | } 117 | if(distanceModifier.isComparitorGT()) { 118 | return NOT_IN_PARAGRAPH; 119 | } 120 | } 121 | 122 | } 123 | if(operator.hasModifier("ordered")) { 124 | return ORDER; 125 | } 126 | 127 | } 128 | } 129 | 130 | return null; 131 | } 132 | 133 | public static boolean isValid(Operator operator) { 134 | EQLOperator extraOperator = getEQLOperator(operator); 135 | return extraOperator != null; 136 | } 137 | 138 | public static boolean isEQLOperatorClause(Clause clause, EQLOperator extraOperator) { 139 | if(clause instanceof PrefixClause) { 140 | PrefixClause prefixClause = (PrefixClause) clause; 141 | if(prefixClause.getEQLOperator() != null && prefixClause.getEQLOperator() == extraOperator) { 142 | return true; 143 | } 144 | } 145 | return false; 146 | } 147 | 148 | public static boolean isWordDistanceOperator(EQLOperator extraOperator) { 149 | return (extraOperator == EQLOperator.DISTANCE || extraOperator == EQLOperator.NOT_WITHIN_DISTANCE || 150 | extraOperator == EQLOperator.ORDER || extraOperator == EQLOperator.ORDER_AND_DISTANCE || 151 | extraOperator == EQLOperator.NOT_IN_PHRASE); 152 | } 153 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/extra/EQLRelation.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.extra; 2 | 3 | import org.iptc.extra.core.eql.tree.nodes.Relation; 4 | 5 | public enum EQLRelation { 6 | 7 | CONTAIN ("="), 8 | EXACT ("=="), 9 | NOT_EQUAL ("<>"), 10 | LT ("<"), 11 | LTE ("<="), 12 | GT (">"), 13 | GTE (">="), 14 | ADJ ("adj"), 15 | ALL ("all"), 16 | ANY ("any"), 17 | WITHIN ("within"); 18 | 19 | private final String relation; 20 | 21 | EQLRelation(String relation) { 22 | this.relation = relation; 23 | }; 24 | 25 | private String relation() { return relation; } 26 | 27 | public static boolean isValid(Relation relation) { 28 | String r = relation.getRelation(); 29 | if(r == null) { 30 | return false; 31 | } 32 | 33 | r = r.toLowerCase(); 34 | 35 | if(r.equals(CONTAIN.relation())) { 36 | return true; 37 | } 38 | if(r.equals(EXACT.relation())) { 39 | return true; 40 | } 41 | if(r.equals(NOT_EQUAL.relation())) { 42 | return true; 43 | } 44 | if(r.equals(LT.relation()) || r.equals(LTE.relation())) { 45 | return true; 46 | } 47 | if(r.equals(GT.relation()) || r.equals(GTE.relation())) { 48 | return true; 49 | } 50 | if(r.equals(ADJ.relation())) { 51 | return true; 52 | } 53 | if(r.equals(ALL.relation())) { 54 | return true; 55 | } 56 | if(r.equals(ANY.relation())) { 57 | return true; 58 | } 59 | if(r.equals(WITHIN.relation())) { 60 | return true; 61 | } 62 | 63 | return false; 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Clause.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * Clause class corresponds to a statement in an EQL query 8 | * 9 | * Can be one of the following: 10 | * prefix clause (org.iptc.extra.core.eql.tree.nodes.PrefixClause) 11 | * search clause (org.iptc.extra.core.eql.tree.nodes.SearchClause) 12 | * comment clause (org.iptc.extra.core.eql.tree.nodes.CommentClause) 13 | * reference clause (org.iptc.extra.core.eql.tree.nodes.ReferenceClause) 14 | * 15 | */ 16 | public class Clause extends Node { 17 | 18 | @Override 19 | public boolean hasChildren() { 20 | return true; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/CommentClause.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * @author manosetro - Manos Schinas 5 | * 6 | * CommentClause corresponds to in-line comments in the rule. 7 | * Every string between //...// is considered as a comment and mapped to a CommentClause object in the syntax tree. 8 | * 9 | * (or 10 | * (title any "term1 term2 term2") 11 | * //this is a comment that will be mapped to a ReferenceClause// 12 | * ) 13 | * 14 | */ 15 | public class CommentClause extends Clause { 16 | 17 | protected String comment; 18 | 19 | public CommentClause() { 20 | 21 | } 22 | 23 | public CommentClause(String comment) { 24 | this.comment = comment; 25 | } 26 | 27 | public String getComment() { 28 | return comment; 29 | } 30 | 31 | public void setComment(String comment) { 32 | this.comment = comment; 33 | } 34 | 35 | @Override 36 | public boolean hasChildren() { 37 | return false; 38 | } 39 | 40 | @Override 41 | public String toString() { 42 | return comment; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/ErrorMessageNode.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * A node that corresponds to a syntax error produced during eql parsing by antlr 8 | * 9 | */ 10 | public class ErrorMessageNode extends Node { 11 | 12 | private String errorMessage; 13 | 14 | @Override 15 | public boolean hasChildren() { 16 | return false; 17 | } 18 | 19 | public String getErrorMessage() { 20 | return errorMessage; 21 | } 22 | 23 | public void setErrorMessage(String errorMessage) { 24 | this.errorMessage = errorMessage; 25 | } 26 | 27 | public String toString() { 28 | return errorMessage; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Index.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * Part of search clauses: 8 | * 9 | * Index Relation SearchTerm 10 | * | | | 11 | * title any "term1 term2" 12 | * 13 | */ 14 | public class Index extends Node { 15 | 16 | private String name; 17 | 18 | public Index(String name) { 19 | super(); 20 | this.name = name; 21 | } 22 | 23 | public String getName() { 24 | return name; 25 | } 26 | 27 | public void setName(String name) { 28 | this.name = name; 29 | } 30 | 31 | @Override 32 | public boolean hasChildren() { 33 | return false; 34 | } 35 | 36 | @Override 37 | public String toString() { 38 | return name; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Modified.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | /** 9 | * 10 | * @author manos schinas 11 | * 12 | * Modified class represents nodes in the syntax tree that can be modified. 13 | * There are two objects that can be modified: operators (org.iptc.extra.core.eql.tree.nodes.Operator) 14 | * and relations (org.iptc.extra.core.eql.tree.nodes.Relation) 15 | * 16 | * 17 | * Example: prox/unit=word/distance>2 18 | * Operator prox is modified by two modifiers unit=word and distance>2 19 | * These two modifiers are kept into modifiersMap 20 | * 21 | */ 22 | public class Modified extends Node { 23 | 24 | protected Map modifiersMap = new HashMap(); 25 | 26 | public List getModifiers() { 27 | return new ArrayList(modifiersMap.values()); 28 | } 29 | 30 | public void setModifiers(List modifiers) { 31 | for(Modifier modifier : modifiers) { 32 | modifiersMap.put(modifier.getModifier(), modifier); 33 | } 34 | } 35 | 36 | public boolean hasModifier(String modifier) { 37 | return modifiersMap.containsKey(modifier); 38 | } 39 | 40 | public Modifier getModifier(String modifier) { 41 | return modifiersMap.get(modifier); 42 | } 43 | 44 | @Override 45 | public boolean hasChildren() { 46 | return false; 47 | } 48 | 49 | public boolean isModified() { 50 | return !modifiersMap.isEmpty(); 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | StringBuffer buffer = new StringBuffer(); 56 | for(Modifier modifier : modifiersMap.values()) { 57 | buffer.append(modifier); 58 | } 59 | 60 | return buffer.toString(); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Modifier.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * A modifier has the following syntax in EQL 8 | * /modifier comparitor value 9 | * 10 | * Example: or/count>5 11 | * 12 | * Node or is modified by count>5 13 | * modifier: count 14 | * comparitor: > 15 | * value: 5 16 | * 17 | */ 18 | public class Modifier extends Node { 19 | 20 | private String modifier; 21 | private String comparitor; 22 | private String value; 23 | 24 | public Modifier(String modifier) { 25 | this.modifier = modifier; 26 | } 27 | 28 | public Modifier(String modifier, String comparitor, String value) { 29 | this.modifier = modifier; 30 | this.comparitor = comparitor; 31 | this.value = value; 32 | } 33 | 34 | public String getModifier() { 35 | return modifier; 36 | } 37 | 38 | public void setModifier(String modifier) { 39 | this.modifier = modifier; 40 | } 41 | 42 | public String getComparitor() { 43 | return comparitor; 44 | } 45 | 46 | public void setComparitor(String comparitor) { 47 | this.comparitor = comparitor; 48 | } 49 | 50 | public String getValue() { 51 | return value; 52 | } 53 | 54 | public void setValue(String value) { 55 | this.value = value; 56 | } 57 | 58 | public boolean hasValue() { 59 | return (value != null && comparitor != null); 60 | } 61 | 62 | public boolean valueEquals(String value) { 63 | return value.equals(this.value); 64 | } 65 | 66 | public boolean isComparitorLT() { 67 | return hasValue() && comparitor.equals("<"); 68 | } 69 | 70 | public boolean isComparitorLTE() { 71 | return hasValue() && comparitor.equals("<="); 72 | } 73 | 74 | public boolean isComparitorGT() { 75 | return hasValue() && comparitor.equals(">"); 76 | } 77 | 78 | public boolean isComparitorGTE() { 79 | return hasValue() && comparitor.equals(">="); 80 | } 81 | 82 | public boolean isComparitorEQ() { 83 | return hasValue() && comparitor.equals("="); 84 | } 85 | 86 | @Override 87 | public String toString() { 88 | return "/" + (hasValue() ? modifier + comparitor + value : modifier); 89 | } 90 | 91 | @Override 92 | public boolean hasChildren() { 93 | return false; 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Node.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * @author manosetro 8 | * 9 | * Node class represents a single node in the syntax tree. 10 | * Node class is extended by any other class in the syntax tree. 11 | * 12 | */ 13 | public abstract class Node { 14 | 15 | protected Node parent; // the parent node of the current node 16 | 17 | protected List children = new ArrayList(); // a list of children 18 | 19 | protected List errors = new ArrayList(); // a list of nodes, correspond to syntax error 20 | 21 | protected int depth; // the depth of that node in the syntax tree 22 | 23 | protected boolean valid = true; // indicates whether that node is valid or not 24 | 25 | public abstract boolean hasChildren(); 26 | 27 | public Node getParent() { 28 | return parent; 29 | } 30 | 31 | public void setParent(Node parent) { 32 | this.parent = parent; 33 | } 34 | 35 | public int getDepth() { 36 | return depth; 37 | } 38 | 39 | public void setDepth(int depth) { 40 | this.depth = depth; 41 | } 42 | 43 | public int getChildCount() { 44 | return children.size(); 45 | } 46 | 47 | public Node getChild(int index) { 48 | return children.get(index); 49 | } 50 | 51 | public boolean addError(Node error) { 52 | return errors.add(error); 53 | } 54 | 55 | public List getErrors() { 56 | return errors; 57 | } 58 | 59 | public Node getError(int index) { 60 | return errors.get(index); 61 | } 62 | 63 | public boolean isValid() { 64 | return valid; 65 | } 66 | 67 | public void setValid(boolean valid) { 68 | this.valid = valid; 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Operator.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * Operator node that can take 4 values: or, and, prox, not. 8 | * 9 | */ 10 | public class Operator extends Modified { 11 | 12 | private String operator; 13 | 14 | public Operator(String operator) { 15 | this.operator = operator; 16 | } 17 | 18 | public String getOperator() { 19 | return operator; 20 | } 21 | 22 | public void setOperator(String operator) { 23 | this.operator = operator; 24 | } 25 | 26 | public boolean isOr() { 27 | return operator != null && operator.contentEquals("or"); 28 | } 29 | 30 | public boolean isAnd() { 31 | return operator != null && operator.contentEquals("and"); 32 | } 33 | 34 | public boolean isNot() { 35 | return operator != null && operator.contentEquals("not"); 36 | } 37 | 38 | public boolean isProx() { 39 | return operator != null && operator.contentEquals("prox"); 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | StringBuffer buffer = new StringBuffer(); 45 | buffer.append(operator); 46 | buffer.append(super.toString()); 47 | return buffer.toString(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/PrefixClause.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.iptc.extra.core.eql.tree.extra.EQLOperator; 8 | 9 | /** 10 | * 11 | * @author manos schinas 12 | * 13 | * PreficClause corresponds to a statement that combines other statements with one of the four operators. 14 | * 15 | * PreficClause <- (Operator 16 | * Clause+ 17 | * ) 18 | * 19 | * 20 | * 21 | */ 22 | public class PrefixClause extends Clause { 23 | 24 | private Operator operator; // the operator: or, and, not, prox 25 | private EQLOperator eqlOperator; // the EQL operator 26 | 27 | private List clauses = new ArrayList(); // the sub-clauses of that prefix clause 28 | 29 | public Operator getOperator() { 30 | return operator; 31 | } 32 | 33 | public void setOperator(Operator operator) { 34 | this.operator = operator; 35 | children.add(operator); 36 | } 37 | 38 | public EQLOperator getEQLOperator() { 39 | return eqlOperator; 40 | } 41 | 42 | public void setEQLOperator(EQLOperator validOperator) { 43 | this.eqlOperator = validOperator; 44 | } 45 | 46 | public List getClauses() { 47 | return clauses; 48 | } 49 | 50 | public Clause getClause(int index) { 51 | return clauses.get(index); 52 | } 53 | 54 | /* 55 | * Get only search clauses 56 | */ 57 | public List getSearchClause() { 58 | List searchClauses = new ArrayList(); 59 | for(Clause clause : clauses) { 60 | if(clause instanceof SearchClause) { 61 | searchClauses.add((SearchClause) clause); 62 | } 63 | } 64 | return searchClauses; 65 | } 66 | 67 | /* 68 | * Get prefix or search clauses 69 | */ 70 | public List getPrefixOrSearchClause() { 71 | List validClauses = new ArrayList(); 72 | for(Clause clause : clauses) { 73 | if(clause instanceof SearchClause || clause instanceof PrefixClause) { 74 | validClauses.add(clause); 75 | } 76 | } 77 | return validClauses; 78 | } 79 | 80 | public void setClauses(List clauses) { 81 | this.clauses = clauses; 82 | 83 | children.addAll(clauses); 84 | } 85 | 86 | @Override 87 | public boolean hasChildren() { 88 | return true; 89 | } 90 | 91 | @Override 92 | public String toString() { 93 | StringBuffer buffer = new StringBuffer(); 94 | buffer.append("("); 95 | buffer.append(operator); 96 | buffer.append(" "); 97 | buffer.append(StringUtils.join(clauses, " ")); 98 | buffer.append(")"); 99 | return buffer.toString(); 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/ReferenceClause.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import org.iptc.extra.core.eql.tree.SyntaxTree; 4 | import org.iptc.extra.core.types.Rule; 5 | 6 | /** 7 | * 8 | * @author manos schinas 9 | * 10 | * A reference clause corresponds to another referenced rule 11 | * 12 | * Example: 13 | * (or 14 | * (title any "term1 term2") 15 | * (@ref == 5954d2231bac0c2f382b7ca5) 16 | * ) 17 | * 18 | */ 19 | public class ReferenceClause extends Clause { 20 | 21 | private String ruleId; // the id of the referenced rule 22 | 23 | private Rule rule; // the referenced rule 24 | 25 | private SyntaxTree ruleSyntaxTree; // the syntax tree of the referenced rule 26 | 27 | 28 | public String getRuleId() { 29 | return ruleId; 30 | } 31 | 32 | public void setRuleId(String ruleId) { 33 | this.ruleId = ruleId; 34 | } 35 | 36 | public Rule getRule() { 37 | return rule; 38 | } 39 | 40 | public void setRule(Rule rule) { 41 | this.rule = rule; 42 | } 43 | 44 | public SyntaxTree getRuleSyntaxTree() { 45 | return ruleSyntaxTree; 46 | } 47 | 48 | public void setRuleSyntaxTree(SyntaxTree ruleSyntaxTree) { 49 | this.ruleSyntaxTree = ruleSyntaxTree; 50 | } 51 | 52 | @Override 53 | public String toString() { 54 | return "( @ref == " + ruleId + " )"; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/Relation.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | /** 4 | * 5 | * @author manos schinas 6 | * 7 | * Part of search clauses: 8 | * 9 | * Index Relation SearchTerm 10 | * | | | 11 | * title any "term1 term2" 12 | * 13 | */ 14 | public class Relation extends Modified { 15 | 16 | private String relation; 17 | 18 | public Relation(String relation) { 19 | this.relation = relation; 20 | } 21 | 22 | public String getRelation() { 23 | return relation; 24 | } 25 | 26 | public void setRelation(String relation) { 27 | this.relation = relation; 28 | } 29 | 30 | @Override 31 | public boolean hasChildren() { 32 | return false; 33 | } 34 | 35 | public boolean is(String relation) { 36 | return this.relation.equals(relation); 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | StringBuffer buffer = new StringBuffer(); 42 | buffer.append(relation); 43 | buffer.append(super.toString()); 44 | return buffer.toString(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/SearchClause.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * 8 | * @author manos schinas 9 | * 10 | * A search clause consists of 11 | * Index Relation SearchTerm 12 | * 13 | * Index and Relation are optional parts. 14 | * When messing SearchTerm can match any field. 15 | * 16 | */ 17 | public class SearchClause extends Clause { 18 | 19 | private Index index; 20 | 21 | private Relation relation; 22 | 23 | private SearchTerm searchTerm; 24 | 25 | public SearchClause() { 26 | 27 | } 28 | 29 | public SearchClause(SearchTerm searchTerm) { 30 | super(); 31 | this.searchTerm = searchTerm; 32 | 33 | children.add(searchTerm); 34 | } 35 | 36 | public SearchClause(Index index, Relation relation, SearchTerm searchTerm) { 37 | super(); 38 | this.index = index; 39 | this.relation = relation; 40 | this.searchTerm = searchTerm; 41 | 42 | children.add(index); 43 | children.add(relation); 44 | children.add(searchTerm); 45 | } 46 | 47 | public Index getIndex() { 48 | return index; 49 | } 50 | 51 | public void setIndex(Index index) { 52 | this.index = index; 53 | children.add(index); 54 | } 55 | 56 | public Relation getRelation() { 57 | return relation; 58 | } 59 | 60 | public void setRelation(Relation relation) { 61 | this.relation = relation; 62 | children.add(relation); 63 | } 64 | 65 | public SearchTerm getSearchTerm() { 66 | return searchTerm; 67 | } 68 | 69 | public void setSearchTerm(SearchTerm searchTerm) { 70 | this.searchTerm = searchTerm; 71 | children.add(searchTerm); 72 | } 73 | 74 | public boolean hasIndex() { 75 | return (index != null && relation != null); 76 | } 77 | 78 | @Override 79 | public String toString() { 80 | return (hasIndex() ? index + " " + relation + " " : "") + searchTerm.toString(); 81 | } 82 | 83 | public List splitSearchClause() { 84 | List searchClauses = new ArrayList(); 85 | 86 | for(String term : searchTerm.getTerms()) { 87 | SearchTerm st = new SearchTerm(term); 88 | SearchClause searchClause = new SearchClause(index, relation, st); 89 | 90 | searchClauses.add(searchClause); 91 | } 92 | 93 | return searchClauses; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/nodes/SearchTerm.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.nodes; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.lang3.StringUtils; 7 | 8 | /** 9 | * 10 | * @author manos schinas 11 | * 12 | * Part of search clauses: 13 | * 14 | * Index Relation SearchTerm 15 | * | | | 16 | * title any "term1 term2" 17 | * title any "\$[0-9]\s+(million|billion)?" 18 | */ 19 | public class SearchTerm extends Node { 20 | 21 | // regular expression characters 22 | private static String[] regexpCharacters = {".", "+", "|", "{", "}", "[", "]", "(", ")", "\"", "\\"}; 23 | 24 | // wildcard characters 25 | private static String[] wildcards = {"+", "*"}; 26 | 27 | private List terms = new ArrayList(); // terms of the search term 28 | 29 | public SearchTerm() { 30 | 31 | } 32 | 33 | public SearchTerm(String term) { 34 | this.terms.add(term); 35 | } 36 | 37 | public SearchTerm(List terms) { 38 | this.terms.addAll(terms); 39 | } 40 | 41 | public List getTerms() { 42 | return terms; 43 | } 44 | 45 | public void setTerms(List terms) { 46 | this.terms = terms; 47 | } 48 | 49 | /* 50 | * Returns a representation of search term by concatenating single terms 51 | */ 52 | public String getSearchTerm() { 53 | String searchTerm = StringUtils.join(terms, " "); 54 | if(searchTerm != null && searchTerm.contains("/")) { 55 | searchTerm = searchTerm.replaceAll(" / ", "/"); 56 | } 57 | 58 | return searchTerm; 59 | } 60 | 61 | /* 62 | * Returns a representation of search term by concatenating single terms 63 | */ 64 | public String getQueryString(String prefix) { 65 | List queryParts = new ArrayList(); 66 | for(String term : terms) { 67 | if(isRegexp(term)) { 68 | queryParts.add(prefix + "(/" + term + "/)"); 69 | } 70 | else { 71 | if(term.matches(".+\\s+.+")) { 72 | List termParts = new ArrayList(); 73 | for(String termPart : term.split("\\s+")) { 74 | termParts.add("+" + termPart); 75 | } 76 | term = "(" + StringUtils.join(termParts, " ") +")"; 77 | } 78 | queryParts.add(prefix + term); 79 | } 80 | } 81 | 82 | return StringUtils.join(queryParts, " "); 83 | } 84 | 85 | @Override 86 | public boolean hasChildren() { 87 | return false; 88 | } 89 | 90 | @Override 91 | public String toString() { 92 | if(isRegexp()) { 93 | return "\"" + getRegexp(true) + "\""; 94 | } 95 | 96 | return "\"" + getSearchTerm() + "\""; 97 | } 98 | 99 | public int numberOfTerms() { 100 | return terms.size(); 101 | } 102 | 103 | public String getTerm(int index) { 104 | return terms.get(index); 105 | } 106 | 107 | // Does search term contains wild-cards? 108 | public boolean hasWildCards() { 109 | for(String term : terms) { 110 | boolean has = hasWildCards(term); 111 | if(has) { 112 | return true; 113 | } 114 | } 115 | return false; 116 | } 117 | 118 | private boolean hasWildCards(String term) { 119 | for(String regexCharacter : wildcards) { 120 | if(term.contains(regexCharacter)) { 121 | return true; 122 | } 123 | } 124 | 125 | return false; 126 | } 127 | 128 | // Is search term a regular expression? 129 | public boolean isRegexp() { 130 | for(String term : terms) { 131 | boolean isRegex = isRegexp(term); 132 | if(isRegex) { 133 | return true; 134 | } 135 | } 136 | return false; 137 | } 138 | 139 | private boolean isRegexp(String term) { 140 | for(String regexCharacter : regexpCharacters) { 141 | if(term.contains(regexCharacter)) { 142 | return true; 143 | } 144 | } 145 | 146 | return false; 147 | } 148 | 149 | // check whether the regular expression contains a whitespace character 150 | public boolean doesRegexpContainWhitespaces() { 151 | String regexp = StringUtils.join(terms, ""); 152 | if(regexp.contains("\\s")) { 153 | return true; 154 | } 155 | 156 | return false; 157 | } 158 | 159 | // Get regular expression string 160 | public String getRegexp(boolean predefinedCharacterClasses) { 161 | String regexp = StringUtils.join(terms, ""); 162 | 163 | if(!predefinedCharacterClasses && regexp != null) { 164 | regexp = regexp 165 | .replace("\\d", "[0-9]") 166 | .replace("\\D", "[^0-9]") 167 | .replace("\\s", "[ \\t\\n\\x0B\\f\\r]") 168 | .replace("\\S", "[^ \\t\\n\\x0B\\f\\r]") 169 | .replace("\\w", "[a-zA-Z_0-9]") 170 | .replace("\\w", "[^a-zA-Z_0-9]"); 171 | 172 | regexp = ".*" + regexp + ".*"; 173 | } 174 | 175 | return regexp; 176 | } 177 | 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/utils/TreeUtils.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Set; 10 | 11 | import org.iptc.extra.core.eql.EQLParser; 12 | import org.iptc.extra.core.eql.tree.SyntaxTree; 13 | import org.iptc.extra.core.eql.tree.nodes.Clause; 14 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 15 | import org.iptc.extra.core.eql.tree.nodes.Index; 16 | import org.iptc.extra.core.eql.tree.nodes.Node; 17 | import org.iptc.extra.core.eql.tree.nodes.Operator; 18 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 19 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 20 | import org.iptc.extra.core.eql.tree.nodes.Relation; 21 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 22 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 23 | import org.iptc.extra.core.eql.tree.visitor.SyntaxTreeVisitor; 24 | import org.iptc.extra.core.types.Rule; 25 | import org.iptc.extra.core.types.Schema; 26 | 27 | import edu.stanford.nlp.util.StringUtils; 28 | 29 | /** 30 | * 31 | * @author manos schinas 32 | * 33 | * This class contains a set of static methods, for the processing of syntax true produced by EQL parser 34 | * 35 | */ 36 | public class TreeUtils { 37 | 38 | // Checks whether the tree starting to root node is valid 39 | public static boolean isTreeValid(Node root) { 40 | // iterate over relation nodes. Return false (rule is invalid) if any relation is invalid 41 | for(Relation relation : getRelations(root)) { 42 | if(!relation.isValid()) { 43 | return false; 44 | } 45 | } 46 | 47 | // iterate over operator nodes. Return false (rule is invalid) if any operator is invalid 48 | for(Operator operator : getOperators(root)) { 49 | if(!operator.isValid()) { 50 | return false; 51 | } 52 | } 53 | return true; 54 | } 55 | 56 | // Iterate over relation and operator nodes and return any invalid node 57 | public static List getInvalidNodes(Node root) { 58 | List nodes = new ArrayList(); 59 | for(Relation relation : getRelations(root)) { 60 | if(!relation.isValid()) { 61 | nodes.add(relation); 62 | } 63 | } 64 | for(Operator operator : getOperators(root)) { 65 | if(!operator.isValid()) { 66 | nodes.add(operator); 67 | } 68 | } 69 | 70 | return nodes; 71 | } 72 | 73 | /* 74 | * checks whether the tree, starting at root, matches the given schema. Returns invalid fields/indices 75 | */ 76 | public static Set validateSchema(Node root, Schema schema) { 77 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 78 | public Set visitIndex(Index index) { 79 | Set indices = new HashSet(); 80 | 81 | if(!schema.getFieldNames().contains(index.getName()) && !"text_content".equals(index.getName())) { 82 | index.setValid(false); 83 | indices.add(index.getName()); 84 | } 85 | return indices; 86 | } 87 | 88 | protected Set aggregateResult(Set aggregate, Set nextResult) { 89 | aggregate.addAll(nextResult); 90 | return aggregate; 91 | } 92 | 93 | protected Set defaultResult() { 94 | return new HashSet(); 95 | } 96 | }; 97 | 98 | if(schema == null) { 99 | return new HashSet(); 100 | } 101 | 102 | Set indices = visitor.visit(root); 103 | return indices; 104 | 105 | } 106 | 107 | /* 108 | * Validate tree across multiple schemas 109 | */ 110 | public static Map> validateSchema(Node root, List schemas) { 111 | 112 | Map> map = new HashMap>(); 113 | for(Schema schema : schemas) { 114 | Set fields = schema.getFieldNames(); 115 | Set indices = getIndices(root); 116 | 117 | indices.retainAll(fields); 118 | 119 | map.put(schema.getId(), indices); 120 | } 121 | 122 | return map; 123 | } 124 | 125 | /* 126 | * Traverse the tree and returns a set of indices 127 | */ 128 | public static Set getIndices(Node root) { 129 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 130 | 131 | public Set visitIndex(Index index) { 132 | Set indices = new HashSet(); 133 | indices.add(index.getName()); 134 | 135 | return indices; 136 | } 137 | 138 | @Override 139 | protected Set aggregateResult(Set aggregate, Set nextResult) { 140 | aggregate.addAll(nextResult); 141 | return aggregate; 142 | } 143 | 144 | @Override 145 | protected Set defaultResult() { 146 | return new HashSet(); 147 | } 148 | 149 | }; 150 | 151 | Set indices = visitor.visit(root); 152 | return indices; 153 | } 154 | 155 | /* 156 | * Traverse the tree and returns a set of relation nodes 157 | */ 158 | public static Set getRelations(Node root) { 159 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 160 | public Set visitRelation(Relation relation) { 161 | Set relations = new HashSet(); 162 | relations.add(relation); 163 | 164 | return relations; 165 | } 166 | 167 | protected Set aggregateResult(Set aggregate, Set nextResult) { 168 | aggregate.addAll(nextResult); 169 | return aggregate; 170 | } 171 | 172 | protected Set defaultResult() { 173 | return new HashSet(); 174 | } 175 | }; 176 | 177 | Set relations = visitor.visit(root); 178 | return relations; 179 | } 180 | 181 | /* 182 | * Traverse the tree and returns a set of nodes, corresponding to search clauses: 183 | * 184 | * SearchClause: Index Relation SearchTerm 185 | * 186 | * e.g. title any "term1 term2" 187 | * 188 | */ 189 | public static Set getSearchClauses(Node root) { 190 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 191 | public Set visitSearchClause(SearchClause searchClause) { 192 | Set searchClauses = new HashSet(); 193 | searchClauses.add(searchClause); 194 | 195 | return searchClauses; 196 | } 197 | 198 | protected Set aggregateResult(Set aggregate, Set nextResult) { 199 | aggregate.addAll(nextResult); 200 | return aggregate; 201 | } 202 | 203 | protected Set defaultResult() { 204 | return new HashSet(); 205 | } 206 | }; 207 | 208 | Set searchClauses = visitor.visit(root); 209 | return searchClauses; 210 | } 211 | 212 | /* 213 | * Traverse the tree and returns a set of nodes, corresponding to search clauses having no idnex and relation 214 | * 215 | * e.g. "term1 term2" 216 | * 217 | */ 218 | public static Set getSearchTermClauses(Node root) { 219 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 220 | public Set visitSearchClause(SearchClause searchClause) { 221 | Set searchClauses = new HashSet(); 222 | 223 | if(!searchClause.hasIndex()) { 224 | searchClauses.add(searchClause); 225 | } 226 | 227 | return searchClauses; 228 | } 229 | 230 | protected Set aggregateResult(Set aggregate, Set nextResult) { 231 | aggregate.addAll(nextResult); 232 | return aggregate; 233 | } 234 | 235 | protected Set defaultResult() { 236 | return new HashSet(); 237 | } 238 | }; 239 | 240 | Set searchClauses = visitor.visit(root); 241 | return searchClauses; 242 | } 243 | 244 | /* 245 | * Traverse the tree and returns a set of operator nodes 246 | */ 247 | public static Set getOperators(Node root) { 248 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 249 | public Set visitOperator(Operator operator) { 250 | Set operators = new HashSet(); 251 | operators.add(operator); 252 | 253 | return operators; 254 | } 255 | 256 | protected Set aggregateResult(Set aggregate, Set nextResult) { 257 | aggregate.addAll(nextResult); 258 | return aggregate; 259 | } 260 | 261 | protected Set defaultResult() { 262 | return new HashSet(); 263 | } 264 | }; 265 | 266 | Set operators = visitor.visit(root); 267 | return operators; 268 | } 269 | 270 | /* 271 | * iterates over a list of clauses and checks whether all of them are search clauses 272 | */ 273 | public static boolean areSearchClauses(Collection clauses) { 274 | for(Clause clause : clauses) { 275 | if(clause instanceof CommentClause) { 276 | continue; 277 | } 278 | if(clause instanceof PrefixClause) { 279 | return false; 280 | } 281 | } 282 | return true; 283 | } 284 | 285 | /* 286 | * iterates over a list of clauses and checks whether all of them are search clauses without index and relation 287 | */ 288 | public static boolean areSearchTermClauses(Collection clauses) { 289 | for(Clause clause : clauses) { 290 | if(clause instanceof CommentClause) { 291 | continue; 292 | } 293 | if(clause instanceof PrefixClause) { 294 | return false; 295 | } 296 | if(clause instanceof ReferenceClause) { 297 | return false; 298 | } 299 | if(clause instanceof SearchClause) { 300 | SearchClause searchClause = (SearchClause) clause; 301 | if(searchClause.hasIndex()) { 302 | return false; 303 | } 304 | } 305 | } 306 | return true; 307 | } 308 | 309 | /* 310 | * Iterates over a list of clauses and create a SearchTerm produced by the concatenation of underlying SearchTerms. 311 | * 312 | * Clauses other than search clauses are ignored 313 | * 314 | */ 315 | public static SearchTerm mergeSearchTerm(List clauses) { 316 | SearchTerm mergedSearchTerm = new SearchTerm(); 317 | 318 | List mergedTerms = new ArrayList(); 319 | for(Clause clause : clauses) { 320 | if(clause instanceof SearchClause) { // use only SearchClause statements 321 | SearchClause searchClause = (SearchClause) clause; 322 | 323 | SearchTerm searchTerms = searchClause.getSearchTerm(); 324 | if(searchTerms.isRegexp()) { 325 | mergedTerms.add(searchTerms.getRegexp(false)); 326 | } 327 | else { 328 | List terms = searchTerms.getTerms(); 329 | mergedTerms.add(StringUtils.join(terms, " ")); 330 | } 331 | } 332 | } 333 | 334 | mergedSearchTerm.setTerms(mergedTerms); 335 | return mergedSearchTerm; 336 | } 337 | 338 | /* 339 | * Traverse the tree and returns a set of index nodes 340 | */ 341 | public static List getIndices(List searchClauses) { 342 | Set set = new HashSet(); 343 | for(SearchClause searchClause : searchClauses) { 344 | if(searchClause.hasIndex()) { 345 | set.add(searchClause.getIndex().getName()); 346 | } 347 | } 348 | 349 | return new ArrayList(set); 350 | } 351 | 352 | /* 353 | * Traverse the tree and aggregate reference clauses 354 | */ 355 | public static List getReferences(Node root) { 356 | SyntaxTreeVisitor> visitor = new SyntaxTreeVisitor>() { 357 | 358 | public List visitReferenceClause(ReferenceClause reference) { 359 | List references = new ArrayList(); 360 | references.add(reference); 361 | 362 | return references; 363 | } 364 | 365 | protected List aggregateResult(List aggregate, List nextResult) { 366 | aggregate.addAll(nextResult); 367 | return aggregate; 368 | } 369 | 370 | protected List defaultResult() { 371 | return new ArrayList(); 372 | } 373 | }; 374 | 375 | List references = visitor.visit(root); 376 | return references; 377 | } 378 | 379 | // validate a rule references inside another rule 380 | public static void validateReferenceRule(ReferenceClause reference, Schema schema) { 381 | 382 | Rule rule = reference.getRule(); 383 | 384 | String query = rule.getQuery(); 385 | 386 | SyntaxTree syntaxTree = EQLParser.parse(query); 387 | reference.setRuleSyntaxTree(syntaxTree); 388 | 389 | //Node root = syntaxTree.getRootNode(); 390 | 391 | //List invalidNodes = ExtraValidator.validate(root, schema); 392 | //Set unmatchedIndices = TreeUtils.validateSchema(root, schema); 393 | //List references = TreeUtils.getReferences(root); 394 | } 395 | 396 | public static boolean isRuleValid(Rule rule, Schema schema) { 397 | String query = rule.getQuery(); 398 | 399 | SyntaxTree syntaxTree = EQLParser.parse(query); 400 | Node root = syntaxTree.getRootNode(); 401 | 402 | if(syntaxTree.hasErrors() || root == null) { 403 | return false; 404 | } 405 | 406 | return true; 407 | 408 | //List invalidNodes = ExtraValidator.validate(root, schema); 409 | //Set unmatchedIndices = TreeUtils.validateSchema(root, schema); 410 | //List references = TreeUtils.getReferences(root); 411 | } 412 | 413 | } 414 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/EQL2HTMLVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import org.iptc.extra.core.eql.tree.nodes.Clause; 4 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 5 | import org.iptc.extra.core.eql.tree.nodes.Index; 6 | import org.iptc.extra.core.eql.tree.nodes.Operator; 7 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 8 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 9 | import org.iptc.extra.core.eql.tree.nodes.Relation; 10 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 11 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 12 | 13 | /** 14 | * @author manosetro - Manos Schinas 15 | * 16 | * EXTRA2ESQueryVisitor performs a depth-first traversal of the syntax tree and generates an HTML representation. 17 | * 18 | * Example rule: 19 | * 20 | * (or 21 | * (title any "term1 term2") 22 | * (and 23 | * (body adj "term3 term4") 24 | * (body any "term5") 25 | * ) 26 | * ) 27 | * 28 | * Transformed to the following HTML section: 29 | * 30 | *
( 31 | *
or
32 | *
( 33 | *
title
34 | *
any
35 | *
"term1 term2"
36 | * )
37 | *
( 38 | *
and
39 | *
( 40 | *
body
41 | *
adj
42 | *
"term3 term4"
43 | * )
44 | *
( 45 | *
body
46 | *
any
47 | *
"term5"
48 | * )
49 | * )
50 | * )
51 | */ 52 | public class EQL2HTMLVisitor extends SyntaxTreeVisitor { 53 | 54 | private String htmlTag; 55 | 56 | public EQL2HTMLVisitor(String htmlTag) { 57 | this.htmlTag = htmlTag; 58 | } 59 | 60 | @Override 61 | public String visitPrefixClause(PrefixClause prefixClause) { 62 | StringBuffer buffer = new StringBuffer(); 63 | 64 | buffer.append("<" + htmlTag + " class=\"prefixClause\" data-depth=\"" + prefixClause.getDepth() + "\"" + 65 | (prefixClause.getEQLOperator()!=null ? " operator=\"" + prefixClause.getEQLOperator() + "\"" : "") + 66 | ">"); 67 | 68 | buffer.append("("); 69 | buffer.append(visit(prefixClause.getOperator())); 70 | 71 | for(Clause clause : prefixClause.getClauses()) { 72 | buffer.append(visit(clause)); 73 | } 74 | 75 | buffer.append(")"); 76 | buffer.append(""); 77 | 78 | return buffer.toString(); 79 | } 80 | 81 | @Override 82 | public String visitOperator(Operator operator) { 83 | StringBuffer buffer = new StringBuffer(); 84 | 85 | buffer.append("<" + htmlTag + " class=\"booleanOp\" data-valid=\"" + operator.isValid() + "\" " 86 | + "data-depth=\"" + operator.getDepth() + "\">"); 87 | buffer.append(operator); 88 | buffer.append(""); 89 | 90 | return buffer.toString(); 91 | } 92 | 93 | @Override 94 | public String visitSearchClause(SearchClause searchClause) { 95 | StringBuffer buffer = new StringBuffer(); 96 | buffer.append("<" + htmlTag + " class=\"searchClause\" data-depth=\"" + searchClause.getDepth() + "\">("); 97 | 98 | if(searchClause.hasIndex()) { 99 | buffer.append(visit(searchClause.getIndex())); 100 | buffer.append(visit(searchClause.getRelation())); 101 | } 102 | buffer.append(visit(searchClause.getSearchTerm())); 103 | 104 | buffer.append(") "); 105 | return buffer.toString(); 106 | } 107 | 108 | @Override 109 | public String visitCommentClause(CommentClause commentClause) { 110 | return ""; 111 | } 112 | 113 | @Override 114 | public String visitReferenceClause(ReferenceClause referenceClause) { 115 | StringBuffer buffer = new StringBuffer(); 116 | buffer.append("<" + htmlTag + " data-depth=\"" + referenceClause.getDepth() + "\"> ("); 117 | 118 | buffer.append("<" + htmlTag + " class=\"referenceClause\" rule=\"" + referenceClause.getRuleId() + "\"" 119 | + "data-valid=\"" + referenceClause.isValid() + "\">"); 120 | 121 | buffer.append("@ref == "); 122 | buffer.append(referenceClause.getRuleId()); 123 | 124 | buffer.append(""); 125 | 126 | buffer.append(") "); 127 | return buffer.toString(); 128 | } 129 | 130 | @Override 131 | public String visitIndex(Index index) { 132 | StringBuffer buffer = new StringBuffer(); 133 | 134 | buffer.append("<" + htmlTag + " class=\"index\" data-valid=\"" + index.isValid() + "\" " 135 | + "data-depth=\"" + index.getDepth() + "\"> "); 136 | buffer.append(index.getName()); 137 | buffer.append(" "); 138 | 139 | return buffer.toString(); 140 | } 141 | 142 | @Override 143 | public String visitRelation(Relation relation) { 144 | StringBuffer buffer = new StringBuffer(); 145 | 146 | buffer.append("<" + htmlTag + " class=\"relation\" data-valid=\"" + relation.isValid() + "\" " 147 | + "data-depth=\"" + relation.getDepth() + "\"> "); 148 | buffer.append(relation); 149 | buffer.append(" "); 150 | 151 | return buffer.toString(); 152 | } 153 | 154 | @Override 155 | public String visitSearchTerm(SearchTerm searchTerm) { 156 | StringBuffer buffer = new StringBuffer(); 157 | 158 | buffer.append("<" + htmlTag + " class=\"searchTerm\" data-depth=\"" + searchTerm.getDepth() + "\"> "); 159 | buffer.append(searchTerm); 160 | buffer.append(" "); 161 | 162 | return buffer.toString(); 163 | } 164 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/EQL2HighlightVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Set; 6 | import java.util.stream.Collectors; 7 | 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.elasticsearch.index.query.BoolQueryBuilder; 10 | import org.elasticsearch.index.query.MatchQueryBuilder; 11 | import org.elasticsearch.index.query.MultiMatchQueryBuilder; 12 | import org.elasticsearch.index.query.QueryBuilder; 13 | import org.elasticsearch.index.query.QueryStringQueryBuilder; 14 | import org.elasticsearch.index.query.RangeQueryBuilder; 15 | import org.iptc.extra.core.eql.tree.SyntaxTree; 16 | import org.iptc.extra.core.eql.tree.extra.EQLOperator; 17 | import org.iptc.extra.core.eql.tree.nodes.Clause; 18 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 19 | import org.iptc.extra.core.eql.tree.nodes.Index; 20 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 21 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 22 | import org.iptc.extra.core.eql.tree.nodes.Relation; 23 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 24 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 25 | import org.iptc.extra.core.types.Schema; 26 | 27 | import static org.elasticsearch.index.query.QueryBuilders.*; 28 | 29 | public class EQL2HighlightVisitor extends SyntaxTreeVisitor { 30 | 31 | private Schema schema; 32 | 33 | public EQL2HighlightVisitor(Schema schema) { 34 | this.schema = schema; 35 | } 36 | 37 | @Override 38 | public QueryBuilder visitPrefixClause(PrefixClause prefixClause) { 39 | EQLOperator extraOperator = prefixClause.getEQLOperator(); 40 | 41 | if(extraOperator == null) { 42 | return null; 43 | } 44 | 45 | if(extraOperator == EQLOperator.AND) { 46 | return andToES(prefixClause); 47 | } 48 | 49 | if(extraOperator == EQLOperator.NOT_IN_PHRASE) { 50 | List clauses = prefixClause.getPrefixOrSearchClause(); 51 | if(clauses.isEmpty()) { 52 | return null; 53 | } 54 | 55 | return visit(clauses.get(0)); 56 | } 57 | 58 | return orToES(prefixClause); 59 | } 60 | 61 | private QueryBuilder andToES(PrefixClause prefixClause) { 62 | List childrenClauses = prefixClause.getClauses(); 63 | childrenClauses = childrenClauses.stream().filter(clause -> !(clause instanceof CommentClause)).collect(Collectors.toList()); 64 | 65 | if(childrenClauses.size() == 1) { 66 | return visit(childrenClauses.get(0)); 67 | } 68 | 69 | BoolQueryBuilder booleanQb = boolQuery(); 70 | 71 | List mustClauses = new ArrayList(); 72 | List mustNotClauses = new ArrayList(); 73 | for(Clause clause : childrenClauses) { 74 | if(EQLOperator.isEQLOperatorClause(clause, EQLOperator.NOT)) { 75 | mustNotClauses.addAll(getChildrenClausesQueries((PrefixClause) clause)); 76 | } 77 | else { 78 | QueryBuilder queryBuilder = visit(clause); 79 | if(queryBuilder != null) { 80 | mustClauses.add(queryBuilder); 81 | } 82 | } 83 | } 84 | 85 | if(mustClauses.isEmpty()) { 86 | return null; 87 | } 88 | 89 | for(QueryBuilder stqb : mustClauses) { 90 | booleanQb.must(stqb); 91 | } 92 | 93 | for(QueryBuilder stqb : mustNotClauses) { 94 | booleanQb.mustNot(stqb); 95 | } 96 | 97 | return booleanQb; 98 | } 99 | 100 | private QueryBuilder orToES(PrefixClause prefixClause) { 101 | 102 | List childrenClauses = prefixClause.getClauses(); 103 | childrenClauses = childrenClauses.stream().filter(clause -> !(clause instanceof CommentClause)).collect(Collectors.toList()); 104 | 105 | if(childrenClauses.size() == 1) { 106 | return visit(childrenClauses.get(0)); 107 | } 108 | 109 | BoolQueryBuilder booleanQb = boolQuery(); 110 | List clausesQueries = getClausesQueries(childrenClauses); 111 | if(clausesQueries.isEmpty()) { 112 | return null; 113 | } 114 | 115 | for(QueryBuilder stqb : clausesQueries) { 116 | booleanQb.should(stqb); 117 | } 118 | 119 | return booleanQb; 120 | } 121 | 122 | private List getClausesQueries(List clauses) { 123 | List clausesQueries = new ArrayList(); 124 | for(Clause clause : clauses) { 125 | QueryBuilder queryBuilder = visit(clause); 126 | if(queryBuilder == null) { 127 | continue; 128 | } 129 | clausesQueries.add(queryBuilder); 130 | } 131 | return clausesQueries; 132 | } 133 | 134 | private List getChildrenClausesQueries(PrefixClause prefixClause) { 135 | List childrenClauses = prefixClause.getClauses(); 136 | List clausesQueries = new ArrayList(); 137 | for(Clause clause : childrenClauses) { 138 | QueryBuilder queryBuilder = visit(clause); 139 | if(queryBuilder == null) { 140 | continue; 141 | } 142 | clausesQueries.add(queryBuilder); 143 | } 144 | return clausesQueries; 145 | } 146 | 147 | @Override 148 | public QueryBuilder visitSearchClause(SearchClause searchClause) { 149 | if(searchClause.hasIndex()) { 150 | Index index = searchClause.getIndex(); 151 | Relation relation = searchClause.getRelation(); 152 | SearchTerm searchTerm = searchClause.getSearchTerm(); 153 | 154 | String indexName = index.getName(); 155 | return searchClausetoES(indexName, relation, searchTerm); 156 | } 157 | else { 158 | QueryBuilder qb = visitChildren(searchClause); 159 | return qb; 160 | } 161 | } 162 | 163 | @Override 164 | public QueryBuilder visitReferenceClause(ReferenceClause referenceClause) { 165 | SyntaxTree syntaxTree = referenceClause.getRuleSyntaxTree(); 166 | if(syntaxTree != null && !syntaxTree.hasErrors() && syntaxTree.getRootNode() != null) { 167 | return visit(syntaxTree.getRootNode()); 168 | } 169 | 170 | return null; 171 | } 172 | 173 | private QueryBuilder searchClausetoES(String index, Relation relation, SearchTerm searchTerm) { 174 | 175 | if(index.equals("text_content")) { 176 | BoolQueryBuilder booleanQb = boolQuery(); 177 | for(String field : schema.getTextualFieldNames()) { 178 | QueryBuilder fieldQb = searchClausetoES(field, relation, searchTerm); 179 | if(fieldQb != null) { 180 | booleanQb.should(fieldQb); 181 | } 182 | } 183 | 184 | return booleanQb; 185 | } 186 | 187 | boolean isRegexp = searchTerm.isRegexp(); 188 | boolean hasWildcards = searchTerm.hasWildCards(); 189 | 190 | String query = searchTerm.getSearchTerm(); 191 | if(isRegexp && !relation.hasModifier("literal")) { 192 | return regexpSearchClause(index, relation, searchTerm); 193 | } 194 | 195 | if(hasWildcards && !relation.hasModifier("literal")) { 196 | return wildcardsSearchClause(index, relation, searchTerm); 197 | } 198 | 199 | if(relation.is("any") || relation.is("=")) { 200 | if(relation.hasModifier("stemming")) { 201 | index = "stemmed_" + index; 202 | } 203 | else if(relation.hasModifier("casesensitive")) { 204 | index = "case_sensitive_" + index; 205 | } 206 | else if(relation.hasModifier("literal")) { 207 | index = "literal_" + index; 208 | } 209 | 210 | return matchQuery(index, query); 211 | 212 | } 213 | else if (relation.is("==")) { 214 | return termQuery(index, query); 215 | } 216 | else if (relation.is("all")) { 217 | if(relation.hasModifier("stemming")) { 218 | index = "stemmed_" + index; 219 | } 220 | else if(relation.hasModifier("casesensitive")) { 221 | index = "case_sensitive_" + index; 222 | } 223 | else if(relation.hasModifier("literal")) { 224 | index = "literal_" + index; 225 | } 226 | 227 | MatchQueryBuilder queryBuilder = matchQuery(index, query); 228 | queryBuilder.operator(org.elasticsearch.index.query.Operator.AND); 229 | 230 | return queryBuilder; 231 | } 232 | else if (relation.is("adj")) { 233 | if(relation.hasModifier("stemming")) { 234 | index = "stemmed_" + index; 235 | } 236 | else if(relation.hasModifier("casesensitive")) { 237 | index = "case_sensitive_" + index; 238 | } 239 | else if(relation.hasModifier("literal")) { 240 | index = "literal_" + index; 241 | } 242 | 243 | return matchPhraseQuery(index, query); 244 | } 245 | else if(relation.is(">")) { 246 | RangeQueryBuilder qb = rangeQuery(index); 247 | return qb.gt(query); 248 | } 249 | else if(relation.is(">=")) { 250 | RangeQueryBuilder qb = rangeQuery(index); 251 | return qb.gte(query); 252 | } 253 | else if(relation.is("<")) { 254 | RangeQueryBuilder qb = rangeQuery(index); 255 | return qb.lt(query); 256 | } 257 | else if(relation.is("<=")) { 258 | RangeQueryBuilder qb = rangeQuery(index); 259 | return qb.lte(query); 260 | } 261 | else if(relation.is("within") && searchTerm.numberOfTerms() == 2) { 262 | RangeQueryBuilder qb = rangeQuery(index); 263 | return qb.gte(searchTerm.getTerm(0)).lte(searchTerm.getTerm(1)); 264 | } 265 | 266 | return null; 267 | } 268 | 269 | private QueryBuilder wildcardsSearchClause(String index, Relation relation, SearchTerm searchTerm) { 270 | 271 | String query = searchTerm.getSearchTerm(); 272 | 273 | if(relation.is("any") || relation.is("=") || relation.is("all") || relation.is("adj")) { 274 | QueryStringQueryBuilder queryBuilder = queryStringQuery(query); 275 | 276 | queryBuilder.analyzeWildcard(true); 277 | 278 | if(index.equals("")) { 279 | for(String field : schema.getTextualFieldNames()) { 280 | queryBuilder.field(field); 281 | } 282 | } 283 | else { 284 | queryBuilder.defaultField(index); 285 | } 286 | 287 | if(relation.is("all") || relation.is("adj")) { 288 | queryBuilder.defaultOperator(org.elasticsearch.index.query.Operator.AND); 289 | } 290 | 291 | return queryBuilder; 292 | } 293 | 294 | if(relation.is("==")) { 295 | return wildcardQuery(index, query); 296 | } 297 | 298 | return null; 299 | } 300 | 301 | private QueryBuilder regexpSearchClause(String index, Relation relation, SearchTerm searchTerm) { 302 | 303 | String query = searchTerm.getSearchTerm(); 304 | 305 | if(relation.is("any") || relation.is("=") || relation.is("all")) { 306 | query = StringUtils.join(searchTerm.getTerms(), ""); 307 | QueryStringQueryBuilder queryBuilder = queryStringQuery("/" + query + "/"); 308 | 309 | queryBuilder.analyzeWildcard(true); 310 | 311 | if(index.equals("")) { 312 | for(String field : schema.getTextualFieldNames()) { 313 | queryBuilder.field(field); 314 | } 315 | } 316 | else { 317 | queryBuilder.defaultField(index); 318 | } 319 | 320 | if(relation.is("all")) { 321 | queryBuilder.defaultOperator(org.elasticsearch.index.query.Operator.AND); 322 | } 323 | 324 | return queryBuilder; 325 | } 326 | 327 | if(relation.is("==")) { 328 | if(relation.hasModifier("regexp")) { 329 | query = StringUtils.join(searchTerm.getTerms(), ""); 330 | return regexpQuery(index, query); 331 | } 332 | else { 333 | return wildcardQuery(index, query); 334 | } 335 | } 336 | 337 | if(relation.is("adj")) { 338 | query = searchTerm.getRegexp(false); 339 | return regexpQuery("raw_" + index, query); 340 | } 341 | 342 | return null; 343 | } 344 | 345 | @Override 346 | public QueryBuilder visitSearchTerm(SearchTerm searchTerm) { 347 | Set fields = schema.getTextualFieldNames(); 348 | String[] fieldNames = fields.toArray(new String[fields.size()]); 349 | 350 | MultiMatchQueryBuilder qb = multiMatchQuery(searchTerm.getSearchTerm(), fieldNames); 351 | qb.operator(org.elasticsearch.index.query.Operator.AND); 352 | 353 | return qb; 354 | } 355 | 356 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/EQL2JSTreeVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import org.iptc.extra.core.eql.tree.SyntaxTree; 4 | import org.iptc.extra.core.eql.tree.nodes.Clause; 5 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 6 | import org.iptc.extra.core.eql.tree.nodes.Index; 7 | import org.iptc.extra.core.eql.tree.nodes.Operator; 8 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 9 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 10 | import org.iptc.extra.core.eql.tree.nodes.Relation; 11 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 12 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 13 | 14 | /** 15 | * @author manosetro - Manos Schinas 16 | * 17 | * EXTRA2ESQueryVisitor performs a depth-first traversal of the syntax tree 18 | * and generates an HTML representation that can be used by the jsTree plugin of jQuery (https://www.jstree.com/). 19 | */ 20 | 21 | public class EQL2JSTreeVisitor extends SyntaxTreeVisitor { 22 | 23 | @Override 24 | public String visitPrefixClause(PrefixClause prefixClause) { 25 | StringBuffer buffer = new StringBuffer(); 26 | 27 | buffer.append("
  • "); 28 | 29 | 30 | buffer.append(visit(prefixClause.getOperator())); 31 | buffer.append("
      "); 32 | for(Clause clause : prefixClause.getClauses()) { 33 | buffer.append(visit(clause)); 34 | } 35 | buffer.append("
    "); 36 | 37 | buffer.append("
  • "); 38 | 39 | return buffer.toString(); 40 | } 41 | 42 | @Override 43 | public String visitOperator(Operator operator) { 44 | StringBuffer buffer = new StringBuffer(); 45 | 46 | buffer.append(""); 47 | buffer.append(operator); 48 | buffer.append(""); 49 | 50 | return buffer.toString(); 51 | } 52 | 53 | @Override 54 | public String visitSearchClause(SearchClause searchClause) { 55 | StringBuffer buffer = new StringBuffer(); 56 | buffer.append("
  • "); 57 | 58 | if(searchClause.hasIndex()) { 59 | buffer.append(visit(searchClause.getIndex())); 60 | buffer.append(visit(searchClause.getRelation())); 61 | } 62 | buffer.append(visit(searchClause.getSearchTerm())); 63 | 64 | buffer.append("
  • "); 65 | return buffer.toString(); 66 | } 67 | 68 | @Override 69 | public String visitReferenceClause(ReferenceClause referenceClause) { 70 | StringBuffer buffer = new StringBuffer(); 71 | buffer.append("
  • "); 72 | 73 | buffer.append(""); 74 | buffer.append("@ref == "); 75 | buffer.append(referenceClause.getRuleId()); 76 | buffer.append(""); 77 | 78 | SyntaxTree syntaxTree = referenceClause.getRuleSyntaxTree(); 79 | if(syntaxTree != null && syntaxTree.getRootNode() != null) { 80 | buffer.append("
      "); 81 | buffer.append(visit(syntaxTree.getRootNode())); 82 | buffer.append("
    "); 83 | } 84 | 85 | buffer.append("
  • "); 86 | return buffer.toString(); 87 | } 88 | 89 | 90 | @Override 91 | public String visitCommentClause(CommentClause commentClause) { 92 | return ""; 93 | } 94 | 95 | @Override 96 | public String visitIndex(Index index) { 97 | StringBuffer buffer = new StringBuffer(); 98 | 99 | buffer.append(" "); 100 | buffer.append(index.getName()); 101 | buffer.append(""); 102 | 103 | return buffer.toString(); 104 | } 105 | 106 | @Override 107 | public String visitRelation(Relation relation) { 108 | StringBuffer buffer = new StringBuffer(); 109 | 110 | buffer.append(" "); 111 | buffer.append(relation); 112 | buffer.append(""); 113 | 114 | return buffer.toString(); 115 | } 116 | 117 | @Override 118 | public String visitSearchTerm(SearchTerm searchTerm) { 119 | StringBuffer buffer = new StringBuffer(); 120 | 121 | buffer.append(" "); 122 | buffer.append(searchTerm); 123 | buffer.append(""); 124 | 125 | return buffer.toString(); 126 | } 127 | } -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/EQLPretifierVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.iptc.extra.core.eql.tree.nodes.Clause; 5 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 6 | import org.iptc.extra.core.eql.tree.nodes.Operator; 7 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 8 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 9 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 10 | 11 | /** 12 | * @author manosetro - Manos Schinas 13 | * 14 | * The class extends SyntaxTreeVisitor class to create a pretified version of the rule, expressed as a sytnax tree. 15 | * Namely, this visitor visits each node of the syntax tree in a depth first fashion, 16 | * concatenates the string representation of each node into a single string, 17 | * while adds newlines and tab characters to pretify it. 18 | * 19 | */ 20 | public class EQLPretifierVisitor extends SyntaxTreeVisitor { 21 | 22 | private String newline; 23 | private String tab; 24 | 25 | public EQLPretifierVisitor(String newline, String tab) { 26 | this.newline = newline; 27 | this.tab = tab; 28 | } 29 | 30 | @Override 31 | public String visitPrefixClause(PrefixClause prefixClause) { 32 | StringBuffer buffer = new StringBuffer(); 33 | 34 | buffer.append(StringUtils.repeat(tab, prefixClause.getDepth())); 35 | buffer.append("("); 36 | 37 | Operator operator = prefixClause.getOperator(); 38 | buffer.append(operator.toString()); 39 | buffer.append(newline); 40 | 41 | for(Clause clause : prefixClause.getClauses()) { 42 | buffer.append(visit(clause)); 43 | } 44 | 45 | buffer.append(StringUtils.repeat(tab, prefixClause.getDepth())); 46 | buffer.append(")"); 47 | buffer.append(newline); 48 | 49 | return buffer.toString(); 50 | } 51 | 52 | @Override 53 | public String visitSearchClause(SearchClause searchClause) { 54 | StringBuffer buffer = new StringBuffer(); 55 | buffer.append(StringUtils.repeat(tab, searchClause.getDepth())); 56 | buffer.append("("); 57 | if(searchClause.hasIndex()) { 58 | buffer.append(searchClause.getIndex()); 59 | buffer.append(" "); 60 | buffer.append(searchClause.getRelation()); 61 | buffer.append(" "); 62 | } 63 | buffer.append(searchClause.getSearchTerm()); 64 | buffer.append(")"); 65 | buffer.append(newline); 66 | 67 | return buffer.toString(); 68 | } 69 | 70 | @Override 71 | public String visitCommentClause(CommentClause commentClause) { 72 | StringBuffer buffer = new StringBuffer(); 73 | buffer.append(StringUtils.repeat(tab, commentClause.getDepth())); 74 | buffer.append(commentClause.getComment()); 75 | buffer.append(newline); 76 | return buffer.toString(); 77 | } 78 | 79 | @Override 80 | public String visitReferenceClause(ReferenceClause referenceClause) { 81 | StringBuffer buffer = new StringBuffer(); 82 | buffer.append(StringUtils.repeat(tab, referenceClause.getDepth())); 83 | buffer.append("(@ref == " + referenceClause.getRuleId() + ")"); 84 | buffer.append(newline); 85 | return buffer.toString(); 86 | } 87 | 88 | } 89 | 90 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/EQLValidator.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Set; 6 | 7 | import org.iptc.extra.core.eql.tree.extra.EQLOperator; 8 | import org.iptc.extra.core.eql.tree.nodes.Clause; 9 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 10 | import org.iptc.extra.core.eql.tree.nodes.ErrorMessageNode; 11 | import org.iptc.extra.core.eql.tree.nodes.Index; 12 | import org.iptc.extra.core.eql.tree.nodes.Node; 13 | import org.iptc.extra.core.eql.tree.nodes.Operator; 14 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 15 | import org.iptc.extra.core.eql.tree.nodes.Relation; 16 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 17 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 18 | import org.iptc.extra.core.eql.tree.utils.TreeUtils; 19 | import org.iptc.extra.core.types.Schema; 20 | import org.iptc.extra.core.types.Schema.Field; 21 | 22 | public class EQLValidator extends SyntaxTreeVisitor> { 23 | 24 | private Schema schema; 25 | 26 | public EQLValidator(Schema schema) { 27 | this.schema = schema; 28 | } 29 | 30 | public static List validate(Node root, Schema schema) { 31 | EQLValidator validator = new EQLValidator(schema); 32 | 33 | List invalidNodes = validator.visit(root); 34 | return invalidNodes; 35 | } 36 | 37 | @Override 38 | public List visitPrefixClause(PrefixClause prefixClause) { 39 | 40 | List invalidNodes = new ArrayList(); 41 | 42 | Operator operator = prefixClause.getOperator(); 43 | EQLOperator extraOperator = prefixClause.getEQLOperator(); 44 | 45 | if(extraOperator == EQLOperator.SENTENCE || extraOperator == EQLOperator.NOT_IN_SENTENCE || 46 | extraOperator == EQLOperator.PARAGRAPH || extraOperator == EQLOperator.NOT_IN_PARAGRAPH || 47 | extraOperator == EQLOperator.DISTANCE || extraOperator == EQLOperator.NOT_WITHIN_DISTANCE || 48 | extraOperator == EQLOperator.ORDER || extraOperator == EQLOperator.ORDER_AND_DISTANCE || extraOperator == EQLOperator.NOT_IN_PHRASE) { 49 | 50 | if(prefixClause.getClauses().size() != 2) { 51 | if(prefixClause.getClauses().size() == 1 && (extraOperator == EQLOperator.SENTENCE || extraOperator == EQLOperator.NOT_IN_SENTENCE || 52 | extraOperator == EQLOperator.PARAGRAPH || extraOperator == EQLOperator.NOT_IN_PARAGRAPH)) { 53 | Clause childClause = prefixClause.getClause(0); 54 | 55 | if(!(childClause instanceof PrefixClause) || !EQLOperator.isWordDistanceOperator(((PrefixClause) childClause).getEQLOperator())) { 56 | ErrorMessageNode node = new ErrorMessageNode(); 57 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") has invalid sub-statement. Only distance operators are permitted in single statements."); 58 | 59 | invalidNodes.add(node); 60 | operator.setValid(false); 61 | } 62 | } 63 | else { 64 | ErrorMessageNode node = new ErrorMessageNode(); 65 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") has invalid number of statement. Only 2 statements are permitted."); 66 | 67 | invalidNodes.add(node); 68 | operator.setValid(false); 69 | } 70 | } 71 | 72 | int stemmedClauses = 0; 73 | Set searchClauses = TreeUtils.getSearchClauses(prefixClause); 74 | for(SearchClause sc : searchClauses) { 75 | if(sc.getRelation() != null && sc.getRelation().hasModifier("stemming")) { 76 | stemmedClauses++; 77 | } 78 | } 79 | 80 | if(stemmedClauses > 0 && searchClauses.size() != stemmedClauses) { 81 | ErrorMessageNode node = new ErrorMessageNode(); 82 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") children mixes stemming and non-stemming."); 83 | 84 | invalidNodes.add(node); 85 | operator.setValid(false); 86 | } 87 | 88 | Set indices = TreeUtils.getIndices(prefixClause); 89 | Set searchTermClauses = TreeUtils.getSearchTermClauses((prefixClause)); 90 | 91 | if(indices.size() > 1) { 92 | ErrorMessageNode node = new ErrorMessageNode(); 93 | node.setErrorMessage(operator.toString() + " (" + extraOperator 94 | + ") has invalid number of indices: " + indices 95 | + ". Only 1 or no index is permitted."); 96 | 97 | invalidNodes.add(node); 98 | operator.setValid(false); 99 | } 100 | else if(!indices.isEmpty() && !searchTermClauses.isEmpty()) { 101 | ErrorMessageNode node = new ErrorMessageNode(); 102 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") cannot mix index and non-index statements"); 103 | 104 | invalidNodes.add(node); 105 | operator.setValid(false); 106 | } 107 | else { 108 | if(schema != null) { 109 | for(String index : indices) { 110 | 111 | Field field = schema.getField(index); 112 | if(field == null && index.contains("text_content")) { 113 | continue; 114 | } 115 | 116 | if(!field.hasSentences && (extraOperator == EQLOperator.SENTENCE || extraOperator == EQLOperator.NOT_IN_SENTENCE)) { 117 | ErrorMessageNode node = new ErrorMessageNode(); 118 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") cannot be applied on a field (" + index + ") without sentences"); 119 | 120 | invalidNodes.add(node); 121 | operator.setValid(false); 122 | } 123 | 124 | if(!field.hasParagraphs && (extraOperator == EQLOperator.PARAGRAPH || extraOperator == EQLOperator.NOT_IN_PARAGRAPH)) { 125 | ErrorMessageNode node = new ErrorMessageNode(); 126 | node.setErrorMessage(operator.toString() + " (" + extraOperator + ") cannot be applied on a field (" + index + ") without paragraphs"); 127 | 128 | invalidNodes.add(node); 129 | operator.setValid(false); 130 | } 131 | } 132 | } 133 | } 134 | } 135 | 136 | if(extraOperator == EQLOperator.MAXIMUM_OCCURRENCE || extraOperator == EQLOperator.MINIMUM_OCCURRENCE) { 137 | int clauses = 0; 138 | for(Clause clause : prefixClause.getClauses()) { 139 | if(!(clause instanceof CommentClause)) { 140 | clauses++; 141 | } 142 | } 143 | 144 | if(clauses != prefixClause.getSearchClause().size()) { 145 | ErrorMessageNode node = new ErrorMessageNode(); 146 | node.setErrorMessage(operator.toString() + " can be applied only to search clauses."); 147 | 148 | invalidNodes.add(node); 149 | operator.setValid(false); 150 | } 151 | } 152 | 153 | if(extraOperator == null) { 154 | ErrorMessageNode node = new ErrorMessageNode(); 155 | node.setErrorMessage(operator.toString() + " is not a valid EXTRA operator"); 156 | 157 | invalidNodes.add(node); 158 | operator.setValid(false); 159 | } 160 | 161 | invalidNodes.addAll(visitChildren(prefixClause)); 162 | 163 | return invalidNodes; 164 | 165 | } 166 | 167 | public List visitSearchClause(SearchClause searchClause) { 168 | 169 | List invalidRelations = new ArrayList(); 170 | 171 | Index index = searchClause.getIndex(); 172 | Relation relation = searchClause.getRelation(); 173 | if(relation == null || index == null) { 174 | return invalidRelations; 175 | } 176 | 177 | if(relation != null && !relation.isValid()) { 178 | ErrorMessageNode node = new ErrorMessageNode(); 179 | node.setErrorMessage(relation.toString() + " is not a valid EXTRA relation"); 180 | 181 | invalidRelations.add(node); 182 | } 183 | 184 | SearchTerm searchTerm = searchClause.getSearchTerm(); 185 | if(searchTerm.isRegexp()) { 186 | 187 | if(relation.hasModifier("stemming")) { 188 | ErrorMessageNode node = new ErrorMessageNode(); 189 | node.setErrorMessage(relation.toString() + ". Stemming cannot be mixed with regex: " + searchTerm); 190 | 191 | invalidRelations.add(node); 192 | relation.setValid(false); 193 | } 194 | 195 | if(relation.is(">") || relation.is(">=") || relation.is("<") || relation.is("<=") || relation.is("within") || relation.is(">")) { 196 | ErrorMessageNode node = new ErrorMessageNode(); 197 | node.setErrorMessage(relation.getRelation() + " relation cannot be mixed with regex: " + searchTerm); 198 | 199 | invalidRelations.add(node); 200 | relation.setValid(false); 201 | } 202 | 203 | } 204 | 205 | if(relation.hasModifier("regexp") && !searchTerm.isRegexp()) { 206 | ErrorMessageNode node = new ErrorMessageNode(); 207 | node.setErrorMessage(relation + " has regexp modifier but no regexp has been detected in search term: " + searchTerm); 208 | 209 | invalidRelations.add(node); 210 | relation.setValid(false); 211 | } 212 | 213 | if(relation.hasModifier("masked") && !searchTerm.hasWildCards()) { 214 | ErrorMessageNode node = new ErrorMessageNode(); 215 | node.setErrorMessage(relation + " has masked modifier but no wildcards have been detected in search term: " + searchTerm); 216 | 217 | invalidRelations.add(node); 218 | relation.setValid(false); 219 | } 220 | 221 | if(relation.hasModifier("stemming") && (relation.is(">") || relation.is(">=") || relation.is("<") || relation.is("<=") || relation.is("within") || relation.is(">"))) { 222 | ErrorMessageNode node = new ErrorMessageNode(); 223 | node.setErrorMessage(relation.getRelation() + " relation cannot has stemming modifier."); 224 | 225 | invalidRelations.add(node); 226 | relation.setValid(false); 227 | } 228 | else if(schema != null && relation.hasModifier("stemming")) { 229 | Field field = schema.getField(index.getName()); 230 | if(field != null && !field.textual) { 231 | ErrorMessageNode node = new ErrorMessageNode(); 232 | node.setErrorMessage("Stemming modifier cannot be applied on a non-textual index: " + index.getName()); 233 | 234 | invalidRelations.add(node); 235 | relation.setValid(false); 236 | } 237 | } 238 | 239 | return invalidRelations; 240 | } 241 | 242 | protected List aggregateResult(List aggregate, List nextResult) { 243 | aggregate.addAll(nextResult); 244 | return aggregate; 245 | } 246 | 247 | protected List defaultResult() { 248 | return new ArrayList(); 249 | } 250 | 251 | } 252 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/ReferenceClausesVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Set; 7 | 8 | import org.iptc.extra.core.daos.RulesDAO; 9 | import org.iptc.extra.core.eql.EQLParser; 10 | import org.iptc.extra.core.eql.tree.SyntaxTree; 11 | import org.iptc.extra.core.eql.tree.nodes.ErrorMessageNode; 12 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 13 | import org.iptc.extra.core.types.Rule; 14 | 15 | public class ReferenceClausesVisitor extends SyntaxTreeVisitor> { 16 | 17 | private RulesDAO dao; 18 | 19 | private Set ruleIds = new HashSet(); 20 | 21 | public ReferenceClausesVisitor(RulesDAO dao, String rootRuleId) { 22 | this.dao = dao; 23 | ruleIds.add(rootRuleId); 24 | } 25 | 26 | @Override 27 | public List visitReferenceClause(ReferenceClause referenceClause) { 28 | 29 | List errors = new ArrayList(); 30 | 31 | String ruleId = referenceClause.getRuleId(); 32 | if(ruleIds.contains(ruleId)) { 33 | 34 | referenceClause.setValid(false); 35 | 36 | ErrorMessageNode errorNode = new ErrorMessageNode(); 37 | errorNode.setErrorMessage("Cyclic reference: " + ruleIds + " - " + ruleId); 38 | 39 | errors.add(errorNode); 40 | } 41 | else { 42 | ruleIds.add(ruleId); 43 | Rule rule = dao.get(ruleId); 44 | if(rule != null) { 45 | referenceClause.setRule(rule); 46 | 47 | String referencedEql = rule.getQuery(); 48 | SyntaxTree referencedSyntaxTree = EQLParser.parse(referencedEql); 49 | referenceClause.setRuleSyntaxTree(referencedSyntaxTree); 50 | 51 | if(!referencedSyntaxTree.hasErrors() && referencedSyntaxTree.getRootNode() != null) { 52 | visit(referencedSyntaxTree.getRootNode()); 53 | } 54 | else { 55 | referenceClause.setValid(false); 56 | 57 | ErrorMessageNode errorNode = new ErrorMessageNode(); 58 | errorNode.setErrorMessage("Referenced rule " + ruleId + " has invalid syntax."); 59 | 60 | errors.add(errorNode); 61 | } 62 | } 63 | else { 64 | referenceClause.setValid(false); 65 | 66 | ErrorMessageNode errorNode = new ErrorMessageNode(); 67 | errorNode.setErrorMessage("Referenced rule " + ruleId + " does not exist."); 68 | 69 | errors.add(errorNode); 70 | } 71 | } 72 | 73 | return errors; 74 | } 75 | 76 | protected List aggregateResult(List aggregate, List nextResult) { 77 | aggregate.addAll(nextResult); 78 | return aggregate; 79 | } 80 | 81 | protected List defaultResult() { 82 | return new ArrayList(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/eql/tree/visitor/SyntaxTreeVisitor.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.eql.tree.visitor; 2 | 3 | import org.iptc.extra.core.eql.tree.nodes.Clause; 4 | import org.iptc.extra.core.eql.tree.nodes.CommentClause; 5 | import org.iptc.extra.core.eql.tree.nodes.Index; 6 | import org.iptc.extra.core.eql.tree.nodes.Node; 7 | import org.iptc.extra.core.eql.tree.nodes.Operator; 8 | import org.iptc.extra.core.eql.tree.nodes.PrefixClause; 9 | import org.iptc.extra.core.eql.tree.nodes.ReferenceClause; 10 | import org.iptc.extra.core.eql.tree.nodes.Relation; 11 | import org.iptc.extra.core.eql.tree.nodes.SearchClause; 12 | import org.iptc.extra.core.eql.tree.nodes.SearchTerm; 13 | 14 | /** 15 | * @author manosetro - Manos Schinas 16 | * 17 | * This class traverses the given syntax tree in a depth first fashion without performing any action. 18 | * That class should be extended by every visitor class. 19 | * 20 | */ 21 | public class SyntaxTreeVisitor { 22 | 23 | public T visit(Node node) { 24 | 25 | if (node instanceof Index) { 26 | return visitIndex((Index) node); 27 | } 28 | 29 | if (node instanceof Relation) { 30 | return visitRelation((Relation) node); 31 | } 32 | 33 | if (node instanceof Operator) { 34 | return visitOperator((Operator) node); 35 | } 36 | 37 | if (node instanceof SearchTerm) { 38 | return visitSearchTerm((SearchTerm) node); 39 | } 40 | 41 | if (node instanceof CommentClause) { 42 | return visitCommentClause((CommentClause) node); 43 | } 44 | 45 | if (node instanceof SearchClause) { 46 | return visitSearchClause((SearchClause) node); 47 | } 48 | 49 | if (node instanceof ReferenceClause) { 50 | return visitReferenceClause((ReferenceClause) node); 51 | } 52 | 53 | if (node instanceof PrefixClause) { 54 | return visitPrefixClause((PrefixClause) node); 55 | } 56 | 57 | return null; 58 | } 59 | 60 | public T visitChildren(Node node) { 61 | T result = defaultResult(); 62 | if(node.hasChildren()) { 63 | int n = node.getChildCount(); 64 | for (int i=0; i findDocuments(String indexName, int page, int nPerPage) throws IOException { 123 | return findDocuments(null, indexName, page, nPerPage, null, null); 124 | } 125 | 126 | public ElasticSearchResponse findDocuments(QueryBuilder qb, String indexName, int page, int nPerPage) throws IOException { 127 | return findDocuments(qb, indexName, page, nPerPage, null, null); 128 | } 129 | 130 | public ElasticSearchResponse findDocuments(QueryBuilder qb, String indexName, int page, int nPerPage, Schema schema) throws IOException { 131 | return findDocuments(qb, indexName, page, nPerPage, schema, null); 132 | } 133 | 134 | public ElasticSearchResponse findDocuments(QueryBuilder qb, String indexName, int page, int nPerPage, QueryBuilder highlightQuery) throws IOException { 135 | return findDocuments(qb, indexName, page, nPerPage, null, highlightQuery); 136 | } 137 | 138 | /** 139 | * 140 | * @param qb The query expressed in Elastic search DSL 141 | * @param indexName The name of the index in elastic search 142 | * @param page Page number 143 | * @param nPerPage Number of documents per page 144 | * @param schema The schema of the documents in the index 145 | * @param highlightQuery The query used to highlight the results 146 | * 147 | * @return ElasticSearchResponse A set of documents that match the query 148 | * 149 | */ 150 | public ElasticSearchResponse findDocuments(QueryBuilder qb, String indexName, int page, int nPerPage, Schema schema, QueryBuilder highlightQuery) throws IOException { 151 | 152 | Integer from = (page - 1) * nPerPage; 153 | Integer size = nPerPage; 154 | 155 | if(qb == null) { 156 | qb = matchAllQuery(); 157 | } 158 | 159 | SearchRequestBuilder request = client.prepareSearch(indexName) 160 | .setTypes("documents") 161 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 162 | .setQuery(qb) 163 | .setFrom(from) 164 | .setSize(size) 165 | .setExplain(false); 166 | 167 | if(schema != null) { 168 | HighlightBuilder hlBuilder = new HighlightBuilder(); 169 | for(String field : schema.getTextualFieldNames()) { 170 | hlBuilder.field(field).fragmentSize(0).numOfFragments(0); 171 | hlBuilder.field("stemmed_" + field).fragmentSize(0).numOfFragments(0); 172 | hlBuilder.field("case_sensitive_" + field).fragmentSize(0).numOfFragments(0); 173 | hlBuilder.field("literal_" + field).fragmentSize(0).numOfFragments(0); 174 | } 175 | 176 | hlBuilder.preTags(""); 177 | hlBuilder.postTags(""); 178 | if(highlightQuery != null) { 179 | hlBuilder.highlightQuery(highlightQuery); 180 | } 181 | 182 | request.highlighter(hlBuilder); 183 | } 184 | 185 | SearchResponse response = request.get(); 186 | 187 | List documents = new ArrayList(); 188 | SearchHits hits = response.getHits(); 189 | float maxScore = hits.getMaxScore() > 0 ? hits.getMaxScore() : 1; 190 | for(SearchHit hit : hits) { 191 | float hitScore = hit.getScore(); 192 | String source = hit.sourceAsString(); 193 | Map highlights = hit.getHighlightFields(); 194 | Document doc = ElasticSearchUtils.sourceToDocument(source, highlights, schema); 195 | if(doc != null) { 196 | String score = Float.toString(hitScore/maxScore); 197 | doc.addField("score", score); 198 | documents.add(doc); 199 | } 200 | } 201 | 202 | ElasticSearchResponse resp = new ElasticSearchResponse(); 203 | resp.setResults(documents); 204 | resp.setFound(hits.getTotalHits()); 205 | 206 | return resp; 207 | } 208 | 209 | /** 210 | * Count number of documents that match the query 211 | * 212 | * @param qb The query expressed in Elastic search DSL 213 | * @param indexName The name of the index in elastic search 214 | * 215 | * @return number of matched documents long 216 | */ 217 | public long countDocuments(QueryBuilder qb, String indexName) throws IOException { 218 | SearchRequestBuilder request = client.prepareSearch(indexName) 219 | .setTypes("documents") 220 | .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) 221 | .setQuery(qb) 222 | .setSize(0) 223 | .setExplain(false); 224 | 225 | SearchResponse response = request.get(); 226 | SearchHits hits = response.getHits(); 227 | 228 | return hits.getTotalHits(); 229 | } 230 | 231 | public int submitRule(String id, QueryBuilder qb, String indexName) throws IOException { 232 | return submitRule(id, qb, indexName, null); 233 | } 234 | 235 | /** 236 | * Submit a rule into percolate index 237 | * 238 | * @param id - The id of the rule 239 | * @param qb - The Elastic Search query generated by the rule 240 | * @param indexName - The index name of the percolate index 241 | * @param groupId - A group id if that rule is part of a subset 242 | * 243 | * @return A code that indicates whether the submission was successful or not 244 | */ 245 | public int submitRule(String id, QueryBuilder qb, String indexName, String groupId) throws IOException { 246 | GetResponse response = client.prepareGet(indexName, "queries", id).setRefresh(true).execute().actionGet(); 247 | if(response.isExists()) { 248 | 249 | UpdateResponse updateResponse = client.prepareUpdate(indexName, "queries", id) 250 | .setDoc(XContentFactory.jsonBuilder() 251 | .startObject() 252 | .field("query", qb) 253 | .endObject()) 254 | .get(); 255 | 256 | if(groupId != null && !groupId.equals("")) { 257 | Map source = response.getSourceAsMap(); 258 | List groups = (List) source.get("group"); 259 | if(groups == null || !groups.contains(groupId)) { 260 | Script script = new Script("if(!ctx._source.containsKey(\"group\")) { ctx._source.group = []; } ctx._source.group.add(\"" + groupId + "\");"); 261 | updateResponse = client.prepareUpdate(indexName, "queries", id).setScript(script).get(); 262 | } 263 | } 264 | 265 | return updateResponse.status().getStatus(); 266 | } 267 | else { 268 | XContentBuilder query = XContentFactory.jsonBuilder().startObject().field("query", qb); 269 | if(groupId != null && !groupId.equals("")) { 270 | query.array("group", groupId); 271 | } 272 | query.endObject(); 273 | 274 | IndexResponse indexResponse = client.prepareIndex(indexName, "queries", id) 275 | .setSource(query) 276 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 277 | .get(); 278 | 279 | return indexResponse.status().getStatus(); 280 | } 281 | 282 | 283 | } 284 | 285 | /** 286 | * Delete a rule with from percolate index 287 | */ 288 | public int deleteRule(String id, String indexName) throws IOException { 289 | DeleteResponse deleteResponse = client.prepareDelete(indexName, "queries", id).get(); 290 | return deleteResponse.status().getStatus(); 291 | 292 | } 293 | 294 | /** 295 | * Given a document, retrieve rules indexed into percolate index 296 | * 297 | * @param document The document used to query percolate index 298 | * @param indexName The name of the percolate index 299 | * @param page Page number 300 | * @param nPerPage Number of rules per page 301 | * 302 | * @return A set of rule ids that match the input document 303 | */ 304 | public ElasticSearchResponse findRules(Document document, String indexName, int page, int nPerPage) throws IOException { 305 | return findRules(document, indexName, null, page, nPerPage); 306 | } 307 | 308 | /** 309 | * Given a document, retrieve rules indexed into percolate index 310 | * 311 | * @param document The document used to query percolate index 312 | * @param indexName The name of the percolate index 313 | * @param group The if of a group in order to get rules that match the document under the specific group 314 | * @param page Page number 315 | * @param nPerPage Number of rules per page 316 | * 317 | * @return A set of rule ids that match the input document 318 | */ 319 | 320 | public ElasticSearchResponse findRules(Document document, String indexName, String group, int page, int nPerPage) throws IOException { 321 | 322 | XContentBuilder docBuilder = ElasticSearchUtils.buildPercolateQuery(document); 323 | 324 | QueryBuilder query; 325 | if(group != null && !group.equals("")) { 326 | query = boolQuery() 327 | .must(new PercolateQueryBuilder("query", "doc", docBuilder.bytes())) 328 | .must(termQuery("group", group)); 329 | } 330 | else { 331 | query = new PercolateQueryBuilder("query", "doc", docBuilder.bytes()); 332 | } 333 | 334 | Integer from = (page - 1) * nPerPage; 335 | Integer size = nPerPage; 336 | 337 | ElasticSearchResponse resp = new ElasticSearchResponse(); 338 | try { 339 | SearchResponse response = client.prepareSearch(indexName) 340 | .setQuery(query) 341 | .setFrom(from) 342 | .setSize(size) 343 | .get(); 344 | 345 | List ruleIds = new ArrayList(); 346 | for(SearchHit hit : response.getHits()) { 347 | ruleIds.add(hit.getId()); 348 | } 349 | resp.setResults(ruleIds); 350 | resp.setFound(response.getHits().getTotalHits()); 351 | } 352 | catch(Exception e) { 353 | 354 | } 355 | 356 | return resp; 357 | } 358 | 359 | public boolean createPercolateIndex(Schema schema) throws IOException { 360 | 361 | IndicesAdminClient indicesClient = client.admin().indices(); 362 | boolean exists = indicesClient.prepareExists(schema.getId()).execute().actionGet().isExists(); 363 | if(!exists) { 364 | XContentBuilder settingBuilder = ElasticSearchUtils.buildPercolateIndexSettings(schema.getLanguage()); 365 | indicesClient.prepareCreate(schema.getId()) 366 | .setSource(settingBuilder) 367 | .get(); 368 | 369 | XContentBuilder mappingBuilder = XContentFactory.jsonBuilder() 370 | .startObject() 371 | .startObject("properties") 372 | .startObject("query").field("type", "percolator").endObject() 373 | .startObject("group").field("type", "keyword").endObject() 374 | .endObject() 375 | .endObject(); 376 | 377 | PutMappingResponse mappingResponse = indicesClient.preparePutMapping(schema.getId()) 378 | .setType("queries") 379 | .setSource(mappingBuilder) 380 | .get(); 381 | 382 | return mappingResponse.isAcknowledged(); 383 | } 384 | 385 | return false; 386 | } 387 | 388 | public boolean createPercolateIndexMapping(Schema schema) throws IOException { 389 | 390 | //Create Percolate Index, if not exist 391 | createPercolateIndex(schema); 392 | 393 | //Create Document Mapping based on the specified schema 394 | XContentBuilder mappingBuilder = ElasticSearchUtils.buildDocumentMapping(schema, false); 395 | PutMappingResponse mappingResponse = client.admin().indices().preparePutMapping(schema.getId()) 396 | .setType("doc") 397 | .setSource(mappingBuilder) 398 | .get(); 399 | 400 | return mappingResponse.isAcknowledged(); 401 | } 402 | 403 | } 404 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/es/ElasticSearchResponse.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.es; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class ElasticSearchResponse { 7 | 8 | private long found = 0; 9 | 10 | private List results = new ArrayList(); 11 | 12 | public long getFound() { 13 | return found; 14 | } 15 | 16 | public void setFound(long found) { 17 | this.found = found; 18 | } 19 | 20 | public List getResults() { 21 | return results; 22 | } 23 | 24 | public void setResults(List results) { 25 | this.results = results; 26 | } 27 | 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/es/ElasticSearchUtils.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.es; 2 | 3 | import java.io.IOException; 4 | import java.io.Reader; 5 | import java.io.StringReader; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Map.Entry; 10 | 11 | import org.elasticsearch.common.text.Text; 12 | import org.elasticsearch.common.xcontent.XContentBuilder; 13 | import org.elasticsearch.common.xcontent.XContentFactory; 14 | import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; 15 | import org.iptc.extra.core.types.Schema; 16 | import org.iptc.extra.core.types.Schema.Field; 17 | import org.iptc.extra.core.types.document.Document; 18 | import org.iptc.extra.core.types.document.DocumentField; 19 | import org.iptc.extra.core.types.document.DocumentTopic; 20 | import org.iptc.extra.core.types.document.Paragraph; 21 | import org.iptc.extra.core.types.document.Sentence; 22 | import org.iptc.extra.core.types.document.StructuredTextField; 23 | import org.iptc.extra.core.types.document.TextField; 24 | import org.iptc.extra.core.utils.TextUtils; 25 | 26 | import com.google.gson.Gson; 27 | import com.google.gson.JsonElement; 28 | import com.google.gson.JsonObject; 29 | import com.google.gson.JsonPrimitive; 30 | 31 | public class ElasticSearchUtils { 32 | 33 | public static XContentBuilder buildPercolateIndexSettings(String lang) throws IOException { 34 | 35 | XContentBuilder settingBuilder = XContentFactory.jsonBuilder().startObject(); 36 | 37 | settingBuilder.startObject("settings").startObject("analysis"); 38 | settingBuilder.startObject("filter"); 39 | addIndexFilters(settingBuilder, lang); 40 | settingBuilder.endObject().startObject("analyzer"); 41 | addIndexAnalyzers(settingBuilder, lang); 42 | settingBuilder.endObject(); 43 | settingBuilder.endObject().endObject().endObject(); 44 | 45 | return settingBuilder; 46 | } 47 | 48 | public static XContentBuilder buildCorporaIndexSettings(Schema schema) throws IOException { 49 | 50 | String lang = schema.getLanguage(); 51 | XContentBuilder settingBuilder = XContentFactory.jsonBuilder().startObject(); 52 | 53 | settingBuilder.startObject("settings") 54 | .startObject("analysis") 55 | .startObject("filter"); 56 | addIndexFilters(settingBuilder, lang); 57 | settingBuilder.endObject() 58 | .startObject("analyzer"); 59 | addIndexAnalyzers(settingBuilder, lang); 60 | settingBuilder.endObject() 61 | .endObject() 62 | .endObject(); 63 | 64 | XContentBuilder mappingsPropertiesBuilder = buildDocumentMapping(schema, true); 65 | 66 | settingBuilder.startObject("mappings") 67 | .rawField("documents", mappingsPropertiesBuilder.bytes()) 68 | .endObject().endObject(); 69 | 70 | return settingBuilder; 71 | } 72 | 73 | private static void addIndexFilters(XContentBuilder mappingBuilder, String lang) throws IOException { 74 | 75 | mappingBuilder.startObject(lang + "_stop"); 76 | mappingBuilder.field("type", "stop"); 77 | mappingBuilder.field("stopwords", "_" + lang + "_"); 78 | mappingBuilder.endObject(); 79 | 80 | if(lang.equals("english")) { 81 | mappingBuilder.startObject("english_possessive_stemmer"); 82 | mappingBuilder.field("type", "stemmer"); 83 | mappingBuilder.field("language", "possessive_english"); 84 | mappingBuilder.endObject(); 85 | 86 | mappingBuilder.startObject("english_stemmer"); 87 | mappingBuilder.field("type", "stemmer"); 88 | mappingBuilder.field("language", "english"); 89 | mappingBuilder.endObject(); 90 | } 91 | 92 | if(lang.equals("german")) { 93 | mappingBuilder.startObject("german_stemmer"); 94 | mappingBuilder.field("type", "stemmer"); 95 | mappingBuilder.field("language", "light_german"); 96 | mappingBuilder.endObject(); 97 | } 98 | } 99 | 100 | 101 | private static void addIndexAnalyzers(XContentBuilder mappingBuilder, String lang) throws IOException { 102 | 103 | if(lang.equals("english")) { 104 | mappingBuilder.startObject("english_stemming_analyzer"); 105 | mappingBuilder.field("tokenizer", "standard"); 106 | mappingBuilder.startArray("filter").value("english_possessive_stemmer").value("lowercase").value("english_stop").value("english_stemmer"); 107 | mappingBuilder.endArray().endObject(); 108 | 109 | mappingBuilder.startObject("english_non_stemming_analyzer"); 110 | mappingBuilder.field("tokenizer", "standard"); 111 | mappingBuilder.startArray("filter").value("lowercase").value("english_stop"); 112 | mappingBuilder.endArray().endObject(); 113 | 114 | mappingBuilder.startObject("english_case_sensitive_analyzer"); 115 | mappingBuilder.field("tokenizer", "standard"); 116 | mappingBuilder.startArray("filter").value("english_stop"); 117 | mappingBuilder.endArray().endObject(); 118 | 119 | mappingBuilder.startObject("english_literal_analyzer"); 120 | mappingBuilder.field("tokenizer", "standard"); 121 | mappingBuilder.startArray("filter").value("lowercase").value("english_stop"); 122 | mappingBuilder.endArray().endObject(); 123 | } 124 | 125 | if(lang.equals("german")) { 126 | mappingBuilder.startObject("german_stemming_analyzer"); 127 | mappingBuilder.field("tokenizer", "standard"); 128 | mappingBuilder.startArray("filter").value("lowercase").value("german_stop").value("german_normalization").value("german_stemmer"); 129 | mappingBuilder.endArray().endObject(); 130 | 131 | mappingBuilder.startObject("german_non_stemming_analyzer"); 132 | mappingBuilder.field("tokenizer", "standard"); 133 | mappingBuilder.startArray("filter").value("lowercase").value("german_stop").value("german_normalization"); 134 | mappingBuilder.endArray().endObject(); 135 | 136 | mappingBuilder.startObject("german_case_sensitive_analyzer"); 137 | mappingBuilder.field("tokenizer", "standard"); 138 | mappingBuilder.startArray("filter").value("german_stop").value("german_normalization"); 139 | mappingBuilder.endArray().endObject(); 140 | 141 | mappingBuilder.startObject("german_literal_analyzer"); 142 | mappingBuilder.field("tokenizer", "whitespace"); 143 | mappingBuilder.startArray("filter").value("lowercase").value("german_stop").value("german_normalization"); 144 | mappingBuilder.endArray().endObject(); 145 | } 146 | 147 | } 148 | 149 | public static XContentBuilder buildPercolateQuery(Document document) throws IOException { 150 | XContentBuilder docBuilder = XContentFactory.jsonBuilder().startObject(); 151 | for(String fieldName : document.keySet()) { 152 | DocumentField field = document.get(fieldName); 153 | if(field instanceof StructuredTextField) { 154 | StructuredTextField structuredField = (StructuredTextField) field; 155 | docBuilder.field(fieldName, structuredField.getValue()); 156 | 157 | docBuilder.startArray(fieldName + "_paragraphs"); 158 | for(Paragraph paragraph : structuredField.getParagraphs()) { 159 | docBuilder.startObject() 160 | .field("paragraph", paragraph.getParagraph()) 161 | .endObject(); 162 | } 163 | docBuilder.endArray(); 164 | 165 | docBuilder.startArray(fieldName + "_sentences"); 166 | for(Sentence sentence : structuredField.getSentences()) { 167 | docBuilder.startObject() 168 | .field("sentence", sentence.getText()) 169 | .endObject(); 170 | } 171 | docBuilder.endArray(); 172 | 173 | } 174 | else if(field instanceof TextField) { 175 | docBuilder.field(fieldName, ((TextField) field).getValue()); 176 | } 177 | } 178 | docBuilder.endObject(); 179 | 180 | return docBuilder; 181 | } 182 | 183 | public static XContentBuilder buildDocumentMapping(Schema schema, boolean addTopicsMapping) throws IOException { 184 | 185 | String lang = schema.getLanguage(); 186 | 187 | XContentBuilder mappingBuilder = XContentFactory.jsonBuilder().startObject(); 188 | 189 | mappingBuilder.startObject("_all").field("enabled", "false").endObject(); 190 | 191 | mappingBuilder.startObject("properties"); 192 | for(String fieldName : schema.getFieldNames()) { 193 | 194 | Field field = schema.getField(fieldName); 195 | if(field.textual) { 196 | addFieldMapping(mappingBuilder, fieldName, lang + "_non_stemming_analyzer"); 197 | addFieldMapping(mappingBuilder, "literal_" + fieldName, lang + "_literal_analyzer"); 198 | addFieldMapping(mappingBuilder, "stemmed_" + fieldName, lang + "_stemming_analyzer"); 199 | addFieldMapping(mappingBuilder, "case_sensitive_" + fieldName, lang + "_case_sensitive_analyzer"); 200 | addKeywordFieldMapping(mappingBuilder, "raw_" + fieldName); 201 | addKeywordFieldMapping(mappingBuilder, fieldName + "_tokens"); 202 | addKeywordFieldMapping(mappingBuilder, "stemmed_" + fieldName + "_tokens"); 203 | 204 | if(field.hasSentences) { 205 | addNestedFieldMapping(mappingBuilder, fieldName + "_sentences", "sentence", lang + "_non_stemming_analyzer"); 206 | addNestedFieldMapping(mappingBuilder, "literal_" + fieldName + "_sentences", "sentence", lang + "_literal_analyzer"); 207 | addNestedFieldMapping(mappingBuilder, "stemmed_" + fieldName + "_sentences", "sentence", lang + "_stemming_analyzer"); 208 | addNestedFieldMapping(mappingBuilder, "case_sensitive_" + fieldName + "_sentences", "sentence", lang + "_case_sensitive_analyzer"); 209 | } 210 | 211 | if(field.hasParagraphs) { 212 | addNestedFieldMapping(mappingBuilder, fieldName + "_paragraphs", "paragraph", lang + "_non_stemming_analyzer"); 213 | addNestedFieldMapping(mappingBuilder, "literal_" + fieldName + "_paragraphs", "paragraph", lang + "_literal_analyzer"); 214 | addNestedFieldMapping(mappingBuilder, "stemmed_" + fieldName + "_paragraphs", "paragraph", lang + "_stemming_analyzer"); 215 | addNestedFieldMapping(mappingBuilder, "case_sensitive_" + fieldName + "_paragraphs", "paragraph", lang + "_case_sensitive_analyzer"); 216 | } 217 | } 218 | else { 219 | String type = "keyword"; 220 | if(field.numeric) { 221 | type = "long"; 222 | } 223 | else if(field.date) { 224 | type = "date"; 225 | } 226 | 227 | mappingBuilder.startObject(fieldName); 228 | mappingBuilder.field("type", type); 229 | mappingBuilder.endObject(); 230 | } 231 | 232 | } 233 | 234 | if(addTopicsMapping) { 235 | addKeywordFieldMapping(mappingBuilder, "excluded"); 236 | String[] topicFields = {"topicId", "name", "exclude", "parentTopic", "url", "association"}; 237 | addNestedKeywordsMapping(mappingBuilder, "topics", topicFields); 238 | } 239 | 240 | //text_content field 241 | addFieldMapping(mappingBuilder, "text_content", lang + "_non_stemming_analyzer"); 242 | addFieldMapping(mappingBuilder, "literal_text_content", lang + "_literal_analyzer"); 243 | addFieldMapping(mappingBuilder, "stemmed_text_content", lang + "_stemming_analyzer"); 244 | addFieldMapping(mappingBuilder, "case_sensitive_text_content", lang + "_case_sensitive_analyzer"); 245 | addKeywordFieldMapping(mappingBuilder, "text_content_tokens"); 246 | addKeywordFieldMapping(mappingBuilder, "stemmed_text_content_tokens"); 247 | // sentences 248 | addNestedFieldMapping(mappingBuilder, "text_content_sentences", "sentence", lang + "_non_stemming_analyzer"); 249 | addNestedFieldMapping(mappingBuilder, "literal_text_content_sentences", "sentence", lang + "_literal_analyzer"); 250 | addNestedFieldMapping(mappingBuilder, "stemmed_text_content_sentences", "sentence", lang + "_stemming_analyzer"); 251 | addNestedFieldMapping(mappingBuilder, "case_sensitive_text_content_sentences", "sentence", lang + "_case_sensitive_analyzer"); 252 | // paragraphs 253 | addNestedFieldMapping(mappingBuilder, "text_content_paragraphs", "paragraph", lang + "_non_stemming_analyzer"); 254 | addNestedFieldMapping(mappingBuilder, "literal_text_content_paragraphs", "paragraph", lang + "_literal_analyzer"); 255 | addNestedFieldMapping(mappingBuilder, "stemmed_text_content_paragraphs", "paragraph", lang + "_stemming_analyzer"); 256 | addNestedFieldMapping(mappingBuilder, "case_sensitive_text_content_paragraphs", "paragraph", lang + "_case_sensitive_analyzer"); 257 | 258 | mappingBuilder.endObject(); 259 | mappingBuilder.endObject(); 260 | 261 | return mappingBuilder; 262 | } 263 | 264 | 265 | private static void addKeywordFieldMapping(XContentBuilder mappingBuilder, String fieldName) throws IOException { 266 | mappingBuilder.startObject(fieldName); 267 | mappingBuilder.field("type", "keyword"); 268 | mappingBuilder.endObject(); 269 | } 270 | 271 | private static void addFieldMapping(XContentBuilder mappingBuilder, String fieldName, String analyzer) throws IOException { 272 | mappingBuilder.startObject(fieldName); 273 | mappingBuilder.field("type", "text"); 274 | mappingBuilder.field("analyzer", analyzer); 275 | mappingBuilder.endObject(); 276 | } 277 | 278 | private static void addNestedFieldMapping(XContentBuilder mappingBuilder, String fieldName, String subFieldName, String analyzer) throws IOException { 279 | mappingBuilder.startObject(fieldName); 280 | mappingBuilder.field("type", "nested"); 281 | mappingBuilder.startObject("properties"); 282 | mappingBuilder.startObject(subFieldName); 283 | mappingBuilder.field("type", "text"); 284 | mappingBuilder.field("analyzer", analyzer); 285 | mappingBuilder.endObject(); 286 | mappingBuilder.endObject(); 287 | mappingBuilder.endObject(); 288 | } 289 | 290 | private static void addNestedKeywordsMapping(XContentBuilder mappingBuilder, String fieldName, String[] subFields) throws IOException { 291 | mappingBuilder.startObject(fieldName); 292 | mappingBuilder.field("type", "nested"); 293 | mappingBuilder.startObject("properties"); 294 | for(String subFieldName : subFields) { 295 | mappingBuilder.startObject(subFieldName); 296 | mappingBuilder.field("type", "keyword"); 297 | mappingBuilder.endObject(); 298 | } 299 | mappingBuilder.endObject(); 300 | mappingBuilder.endObject(); 301 | } 302 | 303 | /* 304 | * Converts elastic search response to org.iptc.extra.core.types.document.Document 305 | */ 306 | public static Document sourceToDocument(String source, Map highlights, Schema schema) { 307 | Gson gson = new Gson(); 308 | Reader br = new StringReader(source); 309 | JsonObject sourceJson = gson.fromJson(br, JsonObject.class); 310 | 311 | Document doc = new Document(); 312 | if(schema == null) { 313 | for(Entry entry : sourceJson.entrySet()) { 314 | if(entry.getValue().isJsonPrimitive()) { 315 | doc.addField(entry.getKey(), entry.getValue().getAsString()); 316 | } 317 | } 318 | return doc; 319 | } 320 | 321 | for(String fieldName : schema.getFieldNames()) { 322 | Field schemaField = schema.getField(fieldName); 323 | JsonElement fieldValue = sourceJson.get(fieldName); 324 | if(fieldValue instanceof JsonPrimitive) { 325 | String value = fieldValue.getAsString(); 326 | 327 | if(highlights != null) { 328 | if(highlights.containsKey(fieldName)) { 329 | Text[] fragments = highlights.get(fieldName).fragments(); 330 | if(fragments.length > 0) { 331 | value = fragments[0].string(); 332 | } 333 | } 334 | else if(highlights.containsKey("stemmed_" + fieldName)) { 335 | Text[] fragments = highlights.get("stemmed_" + fieldName).fragments(); 336 | if(fragments.length > 0) { 337 | value = fragments[0].string(); 338 | } 339 | } 340 | else if(highlights.containsKey("case_sensitive_" + fieldName)) { 341 | Text[] fragments = highlights.get("case_sensitive_" + fieldName).fragments(); 342 | if(fragments.length > 0) { 343 | value = fragments[0].string(); 344 | } 345 | } 346 | else if(highlights.containsKey("literal_" + fieldName)) { 347 | Text[] fragments = highlights.get("literal_" + fieldName).fragments(); 348 | if(fragments.length > 0) { 349 | value = fragments[0].string(); 350 | } 351 | } 352 | else if(highlights.containsKey("raw_" + fieldName)) { 353 | Text[] fragments = highlights.get("raw_" + fieldName).fragments(); 354 | if(fragments.length > 0) { 355 | value = fragments[0].string(); 356 | } 357 | } 358 | } 359 | 360 | if(schemaField.hasParagraphs) { 361 | StructuredTextField bodyField = new StructuredTextField(); 362 | bodyField.setValue(value); 363 | 364 | JsonElement paragraphsArray = sourceJson.get(fieldName + "_paragraphs"); 365 | if(paragraphsArray != null && paragraphsArray.isJsonArray()) { 366 | for(JsonElement paragraphElement : paragraphsArray.getAsJsonArray()) { 367 | String paragraph = paragraphElement.getAsJsonObject().get("paragraph").getAsString(); 368 | bodyField.addParagraph(paragraph); 369 | } 370 | } 371 | 372 | doc.addField(fieldName, bodyField); 373 | } 374 | else { 375 | doc.addField(fieldName, value); 376 | } 377 | } 378 | } 379 | 380 | return doc; 381 | } 382 | 383 | public static XContentBuilder documentToSource(Document document, Schema schema) throws IOException { 384 | 385 | XContentBuilder docBuilder = XContentFactory.jsonBuilder() 386 | .startObject() 387 | .field("id", document.getId()); 388 | 389 | docBuilder.startArray("topics"); 390 | for(DocumentTopic topic : document.getTopics()) { 391 | docBuilder.startObject() 392 | .field("topicId", topic.getTopicId()) 393 | .field("url", topic.getUrl()) 394 | .field("association", topic.getAssociation()) 395 | .field("parentTopic", topic.getParentTopic()) 396 | .field("name", topic.getName()) 397 | .field("exclude", topic.isExclude()) 398 | .endObject(); 399 | } 400 | docBuilder.endArray(); 401 | 402 | StringBuffer textContentBuffer = new StringBuffer(); 403 | for(String fieldName : document.keySet()) { 404 | Field fieldType = schema.getField(fieldName); 405 | if(fieldType != null) { 406 | DocumentField docField = document.get(fieldName); 407 | if(docField instanceof StructuredTextField) { 408 | //TODO: handle structured text fields 409 | } 410 | else if(docField instanceof TextField) { 411 | TextField textField = (TextField) docField; 412 | String fieldValue = textField.getValue(); 413 | 414 | docBuilder.field(fieldName, textField.getValue()); 415 | 416 | if(fieldType.textual) { 417 | textContentBuffer.append(fieldValue + " "); 418 | 419 | docBuilder.field("stemmed_" + fieldName, fieldValue); 420 | docBuilder.field("literal_" + fieldName, fieldValue); 421 | docBuilder.field("case_sensitive_" + fieldName, fieldValue); 422 | docBuilder.field("raw_" + fieldName, textField.getValue()); 423 | } 424 | 425 | if(fieldType.hasParagraphs) { 426 | List paragraphs = TextUtils.getParagraphs(textField.getValue()); 427 | 428 | addParagraphs(docBuilder, fieldName + "_paragraphs", paragraphs); 429 | addParagraphs(docBuilder, "stemmed_" + fieldName + "_paragraphs", paragraphs); 430 | addParagraphs(docBuilder, "literal_" + fieldName + "_paragraphs", paragraphs); 431 | addParagraphs(docBuilder, "case_sensitive_" + fieldName + "_paragraphs", paragraphs); 432 | 433 | List sentences = new ArrayList(); 434 | for(String paragraph : paragraphs) { 435 | sentences.addAll(TextUtils.getSentences(paragraph)); 436 | } 437 | 438 | addSentences(docBuilder, fieldName + "_sentences", sentences); 439 | addSentences(docBuilder, "stemmed_" + fieldName + "_sentences", sentences); 440 | addSentences(docBuilder, "literal_" + fieldName + "_sentences", sentences); 441 | addSentences(docBuilder, "case_sensitive_" + fieldName + "_sentences", sentences); 442 | 443 | } 444 | else { 445 | if(fieldType.hasSentences) { 446 | List sentences = TextUtils.getSentences(textField.getValue()); 447 | 448 | addSentences(docBuilder, fieldName + "_sentences", sentences); 449 | addSentences(docBuilder, "stemmed_" + fieldName + "_sentences", sentences); 450 | addSentences(docBuilder, "literal_" + fieldName + "_sentences", sentences); 451 | addSentences(docBuilder, "case_sensitive_" + fieldName + "_sentences", sentences); 452 | } 453 | } 454 | } 455 | 456 | } 457 | } 458 | 459 | String textContent = textContentBuffer.toString(); 460 | if(textContent.length() > 0) { 461 | docBuilder.field("text_content", textContent); 462 | docBuilder.field("stemmed_text_content", textContent); 463 | docBuilder.field("literal_text_content", textContent); 464 | docBuilder.field("case_sensitive_text_content", textContent); 465 | docBuilder.field("raw_text_content", textContent); 466 | 467 | List sentences = TextUtils.getSentences(textContent); 468 | addSentences(docBuilder, "text_content_sentences", sentences); 469 | addSentences(docBuilder, "stemmed_text_content_sentences", sentences); 470 | addSentences(docBuilder, "literal_text_content_sentences", sentences); 471 | addSentences(docBuilder, "case_sensitive_text_content_sentences", sentences); 472 | 473 | List paragraphs = TextUtils.getParagraphs(textContent); 474 | addParagraphs(docBuilder, "text_content_sentences", paragraphs); 475 | addParagraphs(docBuilder, "stemmed_text_content_sentences", paragraphs); 476 | addParagraphs(docBuilder, "literal_text_content_sentences", paragraphs); 477 | addParagraphs(docBuilder, "case_sensitive_text_content_sentences", paragraphs); 478 | 479 | } 480 | 481 | docBuilder.endObject(); 482 | 483 | return docBuilder; 484 | } 485 | 486 | private static void addParagraphs(XContentBuilder builder, String fieldName, List paragraphs) throws IOException { 487 | builder.startArray(fieldName); 488 | for(String paragraph : paragraphs) { 489 | builder.startObject().field("paragraph", paragraph).endObject(); 490 | } 491 | builder.endArray(); 492 | } 493 | 494 | private static void addSentences(XContentBuilder builder, String fieldName, List sentences) throws IOException { 495 | builder.startArray(fieldName); 496 | for(Sentence sentence : sentences) { 497 | builder.startObject().field("sentence", sentence.getText()).endObject(); 498 | } 499 | builder.endArray(); 500 | } 501 | } 502 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Corpus.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import javax.xml.bind.annotation.XmlRootElement; 4 | 5 | import org.mongodb.morphia.annotations.Entity; 6 | import org.mongodb.morphia.annotations.Id; 7 | import org.mongodb.morphia.annotations.Transient; 8 | 9 | /** 10 | * 11 | * @author manos schinas 12 | * 13 | * Corpus class represents a set of documents 14 | * 15 | */ 16 | @Entity("corpora") 17 | @XmlRootElement 18 | public class Corpus { 19 | 20 | @Id 21 | protected String id; 22 | 23 | protected String name; // the name of the corpus 24 | 25 | protected String schemaId; // the id of the schema of the corpus. The documents belong to that corpus must have that schema 26 | 27 | @Transient 28 | protected Schema schema; 29 | 30 | protected String taxonomyId; // the taxonomy to which the documents of the corpus are associated 31 | 32 | @Transient 33 | protected Taxonomy taxonomy; 34 | 35 | protected String language; // the language of the documents 36 | 37 | protected long documents = 0; // the number of documents associated with that corpus 38 | 39 | protected long createdAt; 40 | 41 | public Corpus() { 42 | 43 | } 44 | 45 | public Corpus(String id, String name, String schemaId, String taxonomyId) { 46 | super(); 47 | this.id = id; 48 | this.name = name; 49 | this.schemaId = schemaId; 50 | this.taxonomyId = taxonomyId; 51 | } 52 | 53 | public String getId() { 54 | return id; 55 | } 56 | 57 | public void setId(String id) { 58 | this.id = id; 59 | } 60 | 61 | public String getName() { 62 | return name; 63 | } 64 | 65 | public void setName(String name) { 66 | this.name = name; 67 | } 68 | 69 | public String getSchemaId() { 70 | return schemaId; 71 | } 72 | 73 | public void setSchemaId(String schemaId) { 74 | this.schemaId = schemaId; 75 | } 76 | 77 | public Schema getSchema() { 78 | return schema; 79 | } 80 | 81 | public void setSchema(Schema schema) { 82 | this.schema = schema; 83 | } 84 | 85 | public String getTaxonomyId() { 86 | return taxonomyId; 87 | } 88 | 89 | public void setTaxonomyId(String taxonomyId) { 90 | this.taxonomyId = taxonomyId; 91 | } 92 | 93 | public Taxonomy getTaxonomy() { 94 | return taxonomy; 95 | } 96 | 97 | public void setTaxonomy(Taxonomy taxonomy) { 98 | this.taxonomy = taxonomy; 99 | } 100 | 101 | public String getLanguage() { 102 | return language; 103 | } 104 | 105 | public void setLanguage(String language) { 106 | this.language = language; 107 | } 108 | 109 | public long getDocuments() { 110 | return documents; 111 | } 112 | 113 | public void setDocuments(long documents) { 114 | this.documents = documents; 115 | } 116 | 117 | public long getCreatedAt() { 118 | return createdAt; 119 | } 120 | 121 | public void setCreatedAt(long createdAt) { 122 | this.createdAt = createdAt; 123 | } 124 | 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Dictionary.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import javax.xml.bind.annotation.XmlRootElement; 7 | 8 | import org.mongodb.morphia.annotations.Entity; 9 | import org.mongodb.morphia.annotations.Id; 10 | 11 | @Entity("dictionaries") 12 | @XmlRootElement() 13 | public class Dictionary { 14 | 15 | @Id 16 | protected String id; 17 | 18 | protected String language; 19 | 20 | protected List terms = new ArrayList(); 21 | 22 | public Dictionary() { 23 | 24 | } 25 | 26 | public Dictionary(String id) { 27 | this.id = id; 28 | } 29 | 30 | public String getId() { 31 | return id; 32 | } 33 | 34 | public void setId(String id) { 35 | this.id = id; 36 | } 37 | 38 | public String getLanguage() { 39 | return language; 40 | } 41 | 42 | public void setLanguage(String language) { 43 | this.language = language; 44 | } 45 | 46 | public List getTerms() { 47 | return terms; 48 | } 49 | 50 | public void setTerms(List terms) { 51 | this.terms = terms; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Group.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import javax.xml.bind.annotation.XmlRootElement; 4 | 5 | import org.mongodb.morphia.annotations.Entity; 6 | import org.mongodb.morphia.annotations.Id; 7 | 8 | /** 9 | * 10 | * @author manos schinas 11 | * 12 | * Group class represents a subset of rules 13 | * 14 | */ 15 | @Entity("groups") 16 | @XmlRootElement() 17 | public class Group { 18 | 19 | @Id 20 | protected String id; 21 | 22 | protected String name; 23 | 24 | public Group() { 25 | 26 | } 27 | 28 | public Group(String id, String name) { 29 | super(); 30 | this.id = id; 31 | this.name = name; 32 | } 33 | 34 | public String getId() { 35 | return id; 36 | } 37 | 38 | public void setId(String id) { 39 | this.id = id; 40 | } 41 | 42 | public String getName() { 43 | return name; 44 | } 45 | 46 | public void setName(String name) { 47 | this.name = name; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Rule.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import javax.xml.bind.annotation.XmlRootElement; 7 | 8 | import org.mongodb.morphia.annotations.Entity; 9 | import org.mongodb.morphia.annotations.Id; 10 | 11 | /** 12 | * 13 | * @author manos schinas 14 | * 15 | * This class represents an extra rule, that can be used for retrieval and classification of documents. 16 | * 17 | */ 18 | @Entity("rules") 19 | @XmlRootElement() 20 | public class Rule { 21 | 22 | @Id 23 | protected String id; // the unique identifier of the rule 24 | 25 | protected String name; // a representative name for the rule 26 | 27 | protected String query; // the actual rule, expressed as an EQL query 28 | 29 | protected String status; // the status of the rule. Can take three values: new, draft, submitted 30 | 31 | protected long createdAt; // creation date of the rule 32 | 33 | protected long updatedAt; // update date of the rule 34 | 35 | protected long submittedAt = 0; // date of the last submission 36 | 37 | protected String uid; // the id of the user that created the ruled 38 | 39 | protected String parentRule; // the id of the parent rule, if any 40 | 41 | protected String taxonomy; // the id of the taxonomy associated with that rule 42 | 43 | protected String topicId; // the id of the topic associated with that rule 44 | 45 | protected String topicName; 46 | 47 | protected List group = new ArrayList(); // a list of group ids used to split rules into subsets 48 | 49 | protected List schemas = new ArrayList(); // a list of schemas to which the rule is associated 50 | 51 | public Rule() { 52 | 53 | } 54 | 55 | public Rule(String id) { 56 | this.id = id; 57 | } 58 | 59 | public Rule(String id, String query) { 60 | this.id = id; 61 | this.query = query; 62 | } 63 | 64 | // Getters/ Setters 65 | 66 | public String getId() { 67 | return id; 68 | } 69 | 70 | public void setId(String id) { 71 | this.id = id; 72 | } 73 | 74 | public String getName() { 75 | return name; 76 | } 77 | 78 | public void setName(String name) { 79 | this.name = name; 80 | } 81 | 82 | public String getQuery() { 83 | return query; 84 | } 85 | 86 | public void setQuery(String query) { 87 | this.query = query; 88 | } 89 | 90 | public String getStatus() { 91 | return status; 92 | } 93 | 94 | public void setStatus(String status) { 95 | this.status = status; 96 | } 97 | 98 | public long getCreatedAt() { 99 | return createdAt; 100 | } 101 | 102 | public void setCreatedAt(long createdAt) { 103 | this.createdAt = createdAt; 104 | } 105 | 106 | public long getUpdatedAt() { 107 | return updatedAt; 108 | } 109 | 110 | public void setUpdatedAt(long updatedAt) { 111 | this.updatedAt = updatedAt; 112 | } 113 | 114 | public long getSubmittedAt() { 115 | return submittedAt; 116 | } 117 | 118 | public void setSubmittedAt(long submittedAt) { 119 | this.submittedAt = submittedAt; 120 | } 121 | 122 | public String getUid() { 123 | return uid; 124 | } 125 | 126 | public void setUid(String uid) { 127 | this.uid = uid; 128 | } 129 | 130 | public String getParentRule() { 131 | return parentRule; 132 | } 133 | 134 | public void setParentRule(String parentRule) { 135 | this.parentRule = parentRule; 136 | } 137 | 138 | public String getTaxonomy() { 139 | return taxonomy; 140 | } 141 | 142 | public void setTaxonomy(String taxonomy) { 143 | this.taxonomy = taxonomy; 144 | } 145 | 146 | public String getTopicId() { 147 | return topicId; 148 | } 149 | 150 | public void setTopicId(String topicId) { 151 | this.topicId = topicId; 152 | } 153 | 154 | public String getTopicName() { 155 | return topicName; 156 | } 157 | 158 | public void setTopicName(String topicName) { 159 | this.topicName = topicName; 160 | } 161 | 162 | public List getGroup() { 163 | if(group == null) { 164 | return new ArrayList(); 165 | } 166 | 167 | return group; 168 | } 169 | 170 | public void setGroup(List group) { 171 | this.group = group; 172 | } 173 | 174 | public List getSchemas() { 175 | if(schemas == null) { 176 | return new ArrayList(); 177 | } 178 | 179 | return schemas; 180 | } 181 | 182 | public void setSchemas(List schemas) { 183 | this.schemas = schemas; 184 | } 185 | 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Schema.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Set; 7 | 8 | import javax.xml.bind.annotation.XmlRootElement; 9 | 10 | import org.mongodb.morphia.annotations.Entity; 11 | import org.mongodb.morphia.annotations.Id; 12 | 13 | /** 14 | * 15 | * @author manos schinas 16 | * 17 | * Schema class represents a set of fields, and the type of each of these fields 18 | * 19 | */ 20 | @Entity("schemas") 21 | @XmlRootElement 22 | public class Schema { 23 | 24 | @Id 25 | protected String id; 26 | 27 | protected String name; 28 | 29 | protected String language; 30 | 31 | protected List fields = new ArrayList(); // the set of fields 32 | 33 | public Schema() { 34 | 35 | } 36 | 37 | public Schema(String id, String name) { 38 | this.id = id; 39 | this.name = name; 40 | } 41 | 42 | public String getId() { 43 | return id; 44 | } 45 | 46 | public void setId(String id) { 47 | this.id = id; 48 | } 49 | 50 | public String getName() { 51 | return name; 52 | } 53 | 54 | public void setName(String name) { 55 | this.name = name; 56 | } 57 | 58 | public String getLanguage() { 59 | return language; 60 | } 61 | 62 | public void setLanguage(String language) { 63 | this.language = language; 64 | } 65 | 66 | public List getFields() { 67 | return fields; 68 | } 69 | 70 | public void addField(String name, boolean textual, boolean hasSentences, boolean hasParagraphs) { 71 | Field field = getField(name); 72 | if(field != null) { 73 | int index = fields.indexOf(field); 74 | field.textual = textual; 75 | field.hasSentences = hasSentences; 76 | field.hasParagraphs = hasParagraphs; 77 | 78 | fields.add(index, field); 79 | } 80 | else { 81 | field = new Field(name, textual); 82 | field.textual = textual; 83 | field.hasSentences = hasSentences; 84 | field.hasParagraphs = hasParagraphs; 85 | 86 | fields.add(field); 87 | } 88 | } 89 | 90 | public void setFields(List fields) { 91 | this.fields = fields; 92 | } 93 | 94 | public Field getField(String fieldName) { 95 | for(Field field : fields) { 96 | if(field.name.equals(fieldName)) { 97 | return field; 98 | } 99 | } 100 | return null; 101 | } 102 | 103 | public Set getFieldNames() { 104 | Set fieldsNames = new HashSet(); 105 | for(Field field : fields) { 106 | fieldsNames.add(field.name); 107 | } 108 | return fieldsNames; 109 | } 110 | 111 | public Set getTextualFieldNames() { 112 | Set fieldsNames = new HashSet(); 113 | for(Field field : fields) { 114 | if(field.textual) { 115 | fieldsNames.add(field.name); 116 | } 117 | } 118 | return fieldsNames; 119 | } 120 | 121 | @XmlRootElement(name = "fields") 122 | public static class Field { 123 | 124 | public String name; 125 | 126 | public boolean date = false; 127 | public boolean numeric = false; 128 | public boolean textual = false; // indicates whether the field is textual or not 129 | public boolean hasSentences = false; // indicates whether a textual field contains sentences 130 | public boolean hasParagraphs = false; // indicates whether a textual field contains paragraphs 131 | 132 | public Field() { 133 | 134 | } 135 | 136 | public Field(String name, boolean textual) { 137 | this.name = name; 138 | this.textual = textual; 139 | } 140 | 141 | } 142 | 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Taxonomy.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import javax.xml.bind.annotation.XmlRootElement; 4 | 5 | import org.mongodb.morphia.annotations.Entity; 6 | import org.mongodb.morphia.annotations.Id; 7 | 8 | /** 9 | * 10 | * @author manos schinas 11 | * 12 | * That class represents a taxonomy of topics 13 | * 14 | */ 15 | @Entity("taxonomies") 16 | @XmlRootElement() 17 | public class Taxonomy { 18 | 19 | @Id 20 | protected String id; 21 | 22 | protected String name; 23 | 24 | protected String language; 25 | 26 | protected long topics; 27 | 28 | public String getId() { 29 | return id; 30 | } 31 | 32 | public void setId(String id) { 33 | this.id = id; 34 | } 35 | 36 | public String getName() { 37 | return name; 38 | } 39 | 40 | public void setName(String name) { 41 | this.name = name; 42 | } 43 | 44 | public String getLanguage() { 45 | return language; 46 | } 47 | 48 | public void setLanguage(String language) { 49 | this.language = language; 50 | } 51 | 52 | public long getTopics() { 53 | return topics; 54 | } 55 | 56 | public void setTopics(long topics) { 57 | this.topics = topics; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/Topic.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types; 2 | 3 | import javax.xml.bind.annotation.XmlRootElement; 4 | 5 | import org.mongodb.morphia.annotations.Entity; 6 | import org.mongodb.morphia.annotations.Field; 7 | import org.mongodb.morphia.annotations.Id; 8 | import org.mongodb.morphia.annotations.Index; 9 | import org.mongodb.morphia.annotations.Indexes; 10 | 11 | /** 12 | * 13 | * @author manos schinas 14 | * 15 | * That class represents a topic 16 | * 17 | */ 18 | @Entity("topics") 19 | @Indexes(@Index(fields = {@Field("topicId"), @Field(value = "taxonomyId")})) 20 | @XmlRootElement() 21 | public class Topic { 22 | 23 | @Id 24 | protected String id; // unique identifier, produced by the concatenation of taxonomy and topic id 25 | 26 | protected String topicId; // unique topic id inside the taxonomy 27 | 28 | protected String name; // the name of the topic 29 | 30 | protected String definition; // a definition - description of the topic 31 | 32 | protected String parentTopic; // the id of the parent of the topic (if any) 33 | 34 | protected String taxonomyId; // the taxonomy to which the topic belongs to 35 | 36 | protected String label; 37 | 38 | protected String url; // a URL of the topic (if any) 39 | 40 | public String getId() { 41 | return id; 42 | } 43 | 44 | public void setId(String id) { 45 | this.id = id; 46 | } 47 | 48 | public void setTopicId(String topicId) { 49 | this.topicId = topicId; 50 | } 51 | 52 | public String getTopicId() { 53 | return topicId; 54 | } 55 | 56 | public String getName() { 57 | return name; 58 | } 59 | 60 | public void setName(String name) { 61 | this.name = name; 62 | } 63 | 64 | public String getDefinition() { 65 | return definition; 66 | } 67 | 68 | public void setDefinition(String definition) { 69 | this.definition = definition; 70 | } 71 | 72 | public String getParentTopic() { 73 | return parentTopic; 74 | } 75 | 76 | public void setParentTopic(String parentTopic) { 77 | this.parentTopic = parentTopic; 78 | } 79 | 80 | public String getTaxonomyId() { 81 | return taxonomyId; 82 | } 83 | 84 | public void setTaxonomyId(String taxonomyId) { 85 | this.taxonomyId = taxonomyId; 86 | } 87 | 88 | public String getLabel() { 89 | return label; 90 | } 91 | 92 | public void setLabel(String label) { 93 | this.label = label; 94 | } 95 | 96 | public String getUrl() { 97 | return url; 98 | } 99 | 100 | public void setUrl(String url) { 101 | this.url = url; 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/Document.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.Set; 8 | 9 | import javax.xml.bind.annotation.XmlRootElement; 10 | 11 | import org.iptc.extra.core.types.Schema; 12 | 13 | import com.google.gson.JsonElement; 14 | import com.google.gson.JsonObject; 15 | 16 | @XmlRootElement() 17 | public class Document extends HashMap { 18 | 19 | /** 20 | * 21 | */ 22 | private static final long serialVersionUID = -2274574518570705042L; 23 | 24 | private String id; 25 | 26 | private List topics = new ArrayList(); 27 | 28 | public Document() { 29 | 30 | } 31 | 32 | public Document(String id) { 33 | this.id = id; 34 | } 35 | 36 | public String getId() { 37 | return this.id; 38 | } 39 | 40 | public void setId(String id) { 41 | this.id = id; 42 | } 43 | 44 | public void addField(String key, DocumentField value) { 45 | this.put(key, value); 46 | } 47 | 48 | public void addField(String key, String value) { 49 | DocumentField field = new TextField(value); 50 | this.put(key, field); 51 | } 52 | 53 | public Set getFieldNames() { 54 | Set fields = new HashSet(this.keySet()); 55 | return fields; 56 | } 57 | 58 | public boolean containsField(String field) { 59 | return this.containsKey(field); 60 | } 61 | 62 | public List getTopics() { 63 | return topics; 64 | } 65 | 66 | public void setTopics(List topics) { 67 | this.topics = topics; 68 | } 69 | 70 | public boolean matchSchema(Schema schema) { 71 | 72 | Set schemaFields = schema.getFieldNames(); 73 | Set documentFields = getFieldNames(); 74 | documentFields.removeAll(schemaFields); 75 | 76 | if(documentFields.isEmpty()) { 77 | return true; 78 | } 79 | 80 | return false; 81 | } 82 | 83 | public JsonElement toJson() { 84 | JsonObject json = new JsonObject(); 85 | for(String fieldName : keySet()) { 86 | DocumentField field = this.get(fieldName); 87 | json.add(fieldName, field.toJson()); 88 | } 89 | return json; 90 | } 91 | 92 | public String toString() { 93 | JsonElement json = toJson(); 94 | return json.toString(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/DocumentField.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import com.google.gson.JsonElement; 4 | 5 | public abstract class DocumentField { 6 | 7 | public abstract JsonElement toJson(); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/DocumentTopic.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | public class DocumentTopic { 4 | 5 | private String topicId; 6 | 7 | private String url; 8 | 9 | private String association; 10 | 11 | private String parentTopic; 12 | 13 | private String name; 14 | 15 | private boolean exclude; 16 | 17 | public String getTopicId() { 18 | return topicId; 19 | } 20 | 21 | public void setTopicId(String topicId) { 22 | this.topicId = topicId; 23 | } 24 | 25 | public String getUrl() { 26 | return url; 27 | } 28 | 29 | public void setUrl(String url) { 30 | this.url = url; 31 | } 32 | 33 | public String getAssociation() { 34 | return association; 35 | } 36 | 37 | public void setAssociation(String association) { 38 | this.association = association; 39 | } 40 | 41 | public String getParentTopic() { 42 | return parentTopic; 43 | } 44 | 45 | public void setParentTopic(String parentTopic) { 46 | this.parentTopic = parentTopic; 47 | } 48 | 49 | public String getName() { 50 | return name; 51 | } 52 | 53 | public void setName(String name) { 54 | this.name = name; 55 | } 56 | 57 | public boolean isExclude() { 58 | return exclude; 59 | } 60 | 61 | public void setExlude(boolean exclude) { 62 | this.exclude = exclude; 63 | } 64 | 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/NestedField.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import com.google.gson.JsonElement; 7 | import com.google.gson.JsonObject; 8 | 9 | public class NestedField extends DocumentField { 10 | 11 | private Map subfields = new HashMap(); 12 | 13 | public void addField(String fieldName, DocumentField fieldValue) { 14 | subfields.put(fieldName, fieldValue); 15 | } 16 | 17 | public void addField(String fieldName, String fieldValue) { 18 | DocumentField field = new TextField(fieldValue); 19 | subfields.put(fieldName, field); 20 | } 21 | 22 | public JsonElement toJson() { 23 | JsonObject json = new JsonObject(); 24 | for(String fieldName : subfields.keySet()) { 25 | DocumentField field = subfields.get(fieldName); 26 | json.add(fieldName, field.toJson()); 27 | } 28 | return json; 29 | } 30 | 31 | @Override 32 | public String toString() { 33 | return toJson().toString(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/Paragraph.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.iptc.extra.core.utils.TextUtils; 7 | 8 | public class Paragraph { 9 | 10 | private String paragraph; 11 | private List sentences = new ArrayList(); 12 | 13 | public Paragraph(String paragraph) { 14 | this.paragraph = paragraph; 15 | sentences = TextUtils.getSentences(paragraph); 16 | } 17 | 18 | public Sentence getSentence(int index) { 19 | return sentences.get(index); 20 | } 21 | 22 | public int getNumberOfSentences() { 23 | return sentences.size(); 24 | } 25 | 26 | public List getSentences() { 27 | return sentences; 28 | } 29 | 30 | public void setSentences(List sentences) { 31 | this.sentences = sentences; 32 | } 33 | 34 | public String toString() { 35 | return paragraph; 36 | } 37 | 38 | public String getParagraph() { 39 | return paragraph; 40 | } 41 | 42 | public void setParagraph(String paragraph) { 43 | this.paragraph = paragraph; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/Sentence.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | public class Sentence { 4 | 5 | private String text; 6 | 7 | public Sentence(String text) { 8 | this.text = text; 9 | } 10 | 11 | public String getText() { 12 | return text; 13 | } 14 | 15 | public void setText(String text) { 16 | this.text = text; 17 | } 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/StructuredTextField.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.google.gson.JsonArray; 7 | import com.google.gson.JsonElement; 8 | 9 | public class StructuredTextField extends TextField { 10 | 11 | private List paragraphs = new ArrayList(); 12 | 13 | public StructuredTextField() { 14 | 15 | } 16 | 17 | public List getParagraphs() { 18 | return paragraphs; 19 | } 20 | 21 | public void setParagraphs(List paragraphs) { 22 | this.paragraphs.addAll(paragraphs); 23 | } 24 | 25 | public void addParagraph(Paragraph paragraph) { 26 | this.paragraphs.add(paragraph); 27 | } 28 | 29 | public void addParagraph(String paragraph) { 30 | Paragraph p = new Paragraph(paragraph); 31 | this.paragraphs.add(p); 32 | } 33 | 34 | public List getSentences() { 35 | List sentences = new ArrayList(); 36 | for(Paragraph paragraph : paragraphs) { 37 | List paragraphSentences = paragraph.getSentences(); 38 | sentences.addAll(paragraphSentences); 39 | } 40 | return sentences; 41 | } 42 | 43 | public JsonElement toJson() { 44 | JsonArray arr = new JsonArray(); 45 | for(Paragraph paragraph : paragraphs) { 46 | arr.add(paragraph.toString()); 47 | } 48 | return arr; 49 | } 50 | 51 | public String toString() { 52 | return getValue(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/types/document/TextField.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.types.document; 2 | 3 | import com.google.gson.JsonElement; 4 | import com.google.gson.JsonPrimitive; 5 | 6 | public class TextField extends DocumentField { 7 | 8 | protected String value; 9 | 10 | public TextField() { 11 | 12 | } 13 | 14 | public TextField(String value) { 15 | this.value = value; 16 | } 17 | 18 | public String getValue() { 19 | return value; 20 | } 21 | 22 | public void setValue(String value) { 23 | this.value = value; 24 | } 25 | 26 | public JsonElement toJson() { 27 | return new JsonPrimitive(value); 28 | } 29 | 30 | public String toString() { 31 | return value; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/org/iptc/extra/core/utils/TextUtils.java: -------------------------------------------------------------------------------- 1 | package org.iptc.extra.core.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Properties; 6 | 7 | import org.apache.commons.lang3.StringEscapeUtils; 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.iptc.extra.core.types.document.Sentence; 10 | import org.jsoup.Jsoup; 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | 14 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; 15 | import edu.stanford.nlp.pipeline.Annotation; 16 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 17 | import edu.stanford.nlp.util.CoreMap; 18 | 19 | /* 20 | * Static methods for text pre-processing 21 | */ 22 | public class TextUtils { 23 | 24 | /* 25 | * Remove HTML tags and redundant white-spaces for a text 26 | */ 27 | public static String clean(String txt) { 28 | 29 | //txt = txt.replaceAll("", " ").replaceAll("<[^>]+>", " "); 30 | txt = Jsoup.parse(txt).text(); 31 | 32 | txt = StringEscapeUtils.unescapeHtml4(txt); 33 | txt = StringUtils.normalizeSpace(txt); 34 | txt = StringUtils.trim(txt); 35 | 36 | return txt; 37 | } 38 | 39 | /* 40 | * Extract a list of sentences from a given text, using Stanford NLP 41 | */ 42 | public static List getSentences(String text) { 43 | 44 | Properties props = new Properties(); 45 | props.put("annotators", "tokenize, ssplit"); 46 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 47 | 48 | List sentences = new ArrayList(); 49 | Annotation document = new Annotation(text); 50 | pipeline.annotate(document); 51 | List nlpSentences = document.get(SentencesAnnotation.class); 52 | for(CoreMap sentence: nlpSentences) { 53 | Sentence s = new Sentence(sentence.toString()); 54 | sentences.add(s); 55 | } 56 | 57 | return sentences; 58 | } 59 | 60 | /* 61 | * Extract a list of paragraphs from a given text. Paragraphs are enclosed into

    ...

    tags 62 | */ 63 | public static List getParagraphs(String text) { 64 | List paragraphs = new ArrayList(); 65 | 66 | org.jsoup.nodes.Document doc = Jsoup.parse(text); 67 | Elements pElements = doc.select("p"); 68 | for (Element pElement : pElements) { 69 | String paragraph = pElement.text(); 70 | paragraphs.add(paragraph); 71 | } 72 | return paragraphs; 73 | } 74 | 75 | } 76 | --------------------------------------------------------------------------------