├── src └── main │ ├── resources │ ├── proj1_schema.png │ └── proj2_schema.png │ └── java │ ├── Main.java │ ├── Subquery.java │ ├── ResultDocument.java │ ├── Query.java │ └── TinySearchEngine.java ├── .github └── workflows │ └── workflow.yml ├── LICENSE ├── pom.xml ├── README.md └── .gitignore /src/main/resources/proj1_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimoneStefani/simple-search-engine/HEAD/src/main/resources/proj1_schema.png -------------------------------------------------------------------------------- /src/main/resources/proj2_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SimoneStefani/simple-search-engine/HEAD/src/main/resources/proj2_schema.png -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up JDK 1.8 13 | uses: actions/setup-java@v1 14 | with: 15 | java-version: 1.8 16 | - name: Build with Maven 17 | run: mvn package --file pom.xml 18 | -------------------------------------------------------------------------------- /src/main/java/Main.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Main.java 3 | * 4 | * Created by S. Stefani on 2016-11-24. 5 | */ 6 | 7 | import se.kth.id1020.Driver; 8 | import se.kth.id1020.TinySearchEngineBase; 9 | 10 | public class Main { 11 | public static void main(String[] args) throws Exception { 12 | TinySearchEngineBase searchEngine = new TinySearchEngine(); 13 | Driver.run(searchEngine); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Simone Stefani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.github.SimoneStefani.simple-search-engine 8 | simple-search-engine 9 | 1.0 10 | 11 | 12 | 13 | se.kth.id1020 14 | tinySearchEngine 15 | 2.0 16 | 17 | 18 | edu.princeton.cs.introcs 19 | algs4-package 20 | 1.0 21 | 22 | 23 | edu.princeton.cs.introcs 24 | stdlib-package 25 | 1.0 26 | 27 | 28 | 29 | 30 | sics−release 31 | SICS Release Repository 32 | http://kompics.sics.se/maven/repository 33 | 34 | 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### Royal Institute of Technology KTH - Stockholm 2 | 3 | # Simple Search Engine 4 | 5 |

6 | GitHub Actions status 7 |

8 | 9 | A simple search engine to index a corpus of documents and search for words with specific query paramteres. 10 | This project is part of the course ID1020 Algorithms and Data Structures. 11 | 12 | _This repository contains code written during the fall semester 2016 by Simone Stefani_ 13 | 14 | ### Structure 15 | 16 | ![alt text](https://github.com/SimoneStefani/simple-search-engine/blob/master/src/main/resources/proj2_schema.png) 17 | 18 | ### Description 19 | 20 | - **Index**: a HashMap that contains all the indexed words as word-list_of_postings key-value pairs. 21 | - **ResultDocument**: an object that links a word (or a set of word) with a document that contains it. It refers to a specific `document` and carries properties related to the words such as `hits`, `populairty` and `relevance` (as tf-idf). 22 | 23 | The search engine contains other two HashMaps: 24 | 25 | - **DocumentsLength**: keeps track of the length of each processed document. 26 | - **Cache**: contains cached queries 27 | 28 | _The the postings (resultDocuments) for each word are sorted dynamically at insertion. Consequently they can be retrieved through binary search._ 29 | 30 | When the user input query string is processed a parsedQuery is returned in the form of nested sub-query objects. Consequently when searching for a complex query, the parsedQuery can be analysed recursively and the fundamental queries can be then combined with operators. 31 | -------------------------------------------------------------------------------- /src/main/java/Subquery.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Subquery.java 3 | * 4 | * Created by S. Stefani on 2017-01-06. 5 | */ 6 | 7 | public class Subquery implements Comparable { 8 | public Comparable leftTerm = null; 9 | public Comparable rightTerm = null; 10 | public String operator; 11 | public String orderedQuery; 12 | 13 | /** 14 | * Construct a sub-query formed only by one element. 15 | * 16 | * @param leftTerm is the term in the sub-query. 17 | */ 18 | public Subquery(Comparable leftTerm) { 19 | this.leftTerm = leftTerm; 20 | this.orderedQuery = leftTerm.toString(); 21 | } 22 | 23 | /** 24 | * Construct a sub-query formed by a triplet of elements. 25 | * 26 | * @param leftTerm is the left-hand term in the sub-query 27 | * @param operator is the element that connects two terms of the sub-query 28 | * @param rightTerm is the right-hand term in the sub-query 29 | */ 30 | public Subquery(Comparable leftTerm, String operator, Comparable rightTerm) { 31 | 32 | this.leftTerm = leftTerm; 33 | this.operator = operator; 34 | this.rightTerm = rightTerm; 35 | } 36 | 37 | /** 38 | * Generate a string with the infix notation of the sub-query. 39 | * 40 | * @return infix sub-query 41 | */ 42 | @Override 43 | public String toString() { 44 | if (rightTerm == null) return leftTerm.toString(); 45 | 46 | return "(" + leftTerm.toString() + " " + operator + " " + rightTerm.toString() + ")"; 47 | } 48 | 49 | /** 50 | * Simple comparator for two sub-queries. 51 | * 52 | * @param o is the sub-query to compare to 53 | * @return usual comparator integer result (-1/0/1) 54 | */ 55 | public int compareTo(String o) { 56 | return this.toString().compareTo(o); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/ResultDocument.java: -------------------------------------------------------------------------------- 1 | /** 2 | * ResultDocument.java 3 | * 4 | * Created by S. Stefani on 2017-01-06. 5 | */ 6 | 7 | import se.kth.id1020.util.Document; 8 | 9 | import java.util.Comparator; 10 | import java.util.HashMap; 11 | 12 | public class ResultDocument implements Comparable { 13 | private Document document; 14 | private int hits; 15 | private int popularity; 16 | private double relevance; 17 | 18 | public ResultDocument(Document document, int hits) { 19 | this.document = document; 20 | this.hits = hits; 21 | this.popularity = document.popularity; 22 | } 23 | 24 | public ResultDocument(Document document, double relevance) { 25 | this.relevance = relevance; 26 | this.document = document; 27 | this.popularity = document.popularity; 28 | } 29 | 30 | /** 31 | * Compute the relevance of posting respect to the executed query by means 32 | * of tf-idf. 33 | * 34 | * @param documentsLengths contains the lengths of all the documents 35 | * @param relevantDocs is the number of relevant docs for the query 36 | */ 37 | public void computeRelevance(HashMap documentsLengths, int relevantDocs) { 38 | relevance = tf(documentsLengths.get(document.name)) * idf(documentsLengths.size(), relevantDocs); 39 | } 40 | 41 | // Compute tf 42 | private double tf(int totalTerms) { 43 | return (double) this.hits / totalTerms; 44 | } 45 | 46 | // Compute idf 47 | private double idf(int totalDocs, int relevantDocs) { 48 | return Math.log10((double) totalDocs / (double) relevantDocs); 49 | } 50 | 51 | // Increment the number of hits 52 | public void updatePosting() { 53 | this.hits++; 54 | } 55 | 56 | public Document getDocument() { 57 | return document; 58 | } 59 | 60 | public int getHits() { 61 | return hits; 62 | } 63 | 64 | public int getPopularity() { 65 | return popularity; 66 | } 67 | 68 | public double getRelevance() { 69 | return relevance; 70 | } 71 | 72 | @Override 73 | public boolean equals(Object o) { 74 | if (this == o) return true; 75 | if (o == null || getClass() != o.getClass()) return false; 76 | 77 | ResultDocument that = (ResultDocument) o; 78 | 79 | return this.document.name.equals(that.document.name); 80 | } 81 | 82 | public int compareTo(ResultDocument o) { 83 | return this.document.name.compareTo(o.document.name); 84 | } 85 | 86 | public static class PopularityComparator implements Comparator { 87 | private int direction; 88 | 89 | public PopularityComparator(int direction) { 90 | this.direction = direction; 91 | } 92 | 93 | public int compare(ResultDocument o1, ResultDocument o2) { 94 | if (o1.getPopularity() < o2.getPopularity()) return -1 * direction; 95 | if (o1.getPopularity() > o2.getPopularity()) return direction; 96 | return 0; 97 | } 98 | } 99 | 100 | public static class RelevanceComparator implements Comparator { 101 | private int direction; 102 | 103 | public RelevanceComparator(int direction) { 104 | this.direction = direction; 105 | } 106 | 107 | public int compare(ResultDocument o1, ResultDocument o2) { 108 | if (o1.getRelevance() < o2.getRelevance()) return -1 * direction; 109 | if (o1.getRelevance() > o2.getRelevance()) return direction; 110 | return 0; 111 | } 112 | } 113 | } -------------------------------------------------------------------------------- /src/main/java/Query.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Query.java 3 | * 4 | * Created by S. Stefani on 2016-12-02. 5 | */ 6 | 7 | import edu.princeton.cs.algs4.Stack; 8 | 9 | public class Query { 10 | private Subquery parsedQuery; 11 | private String property; 12 | private int direction; 13 | 14 | /** 15 | * Receives the user input and run a parser on it. 16 | * 17 | * @param queryString the user input query string 18 | */ 19 | public Query(String queryString) { 20 | parseQuery(queryString); 21 | } 22 | 23 | /** 24 | * Parse a query string separating the composing elements: 25 | * the parsed query (in form of sub-query), the sorting property and direction. 26 | * 27 | * @param str is the query to parse 28 | */ 29 | private void parseQuery(String str) { 30 | String[] parts = str.split("orderby"); 31 | String[] elements = parts[0].split("\\s+"); 32 | 33 | // Use two-stack algorithm to parse prefix notation 34 | Stack> terms = new Stack>(); 35 | Stack> helper = new Stack>(); 36 | 37 | for (String el : elements) { terms.push(el); } 38 | while (!terms.isEmpty()) { 39 | Comparable term = terms.pop(); 40 | String operands = "+|-"; 41 | if (operands.contains(term.toString())) { 42 | Comparable leftSide = helper.pop(); 43 | Comparable rightSide = helper.pop(); 44 | helper.push(new Subquery(leftSide, term.toString(), rightSide)); 45 | } else { 46 | helper.push(term); 47 | } 48 | } 49 | 50 | Comparable resultQuery = helper.pop(); 51 | parsedQuery = resultQuery instanceof String ? new Subquery(resultQuery) : (Subquery) resultQuery; 52 | computeUniqueNotation(parsedQuery); 53 | 54 | if (parts.length < 2) { 55 | return; 56 | } 57 | 58 | // Parse sorting properties 59 | if (parts[1].contains("relevance")) { 60 | property = "RELEVANCE"; 61 | } else if (parts[1].contains("popularity")) { 62 | property = "POPULARITY"; 63 | } 64 | 65 | if (parts[1].contains("asc")) { 66 | direction = 1; 67 | } else if (parts[1].contains("desc")) { 68 | direction = -1; 69 | } 70 | } 71 | 72 | /** 73 | * Analyse a query (sub-query object) and generates a unique notation for each 74 | * of the composing elements. This notation ensure that if two terms are connected 75 | * by a commutative operator, they are also ordered alphabetically in the sub-query. 76 | * This is important to allow the caching system to work with commutative queries. 77 | * 78 | * @param parsedQuery is the parsed version of the user query 79 | * @return an ordered string version of the query 80 | */ 81 | private String computeUniqueNotation(Subquery parsedQuery) { 82 | if (parsedQuery.rightTerm == null) { 83 | parsedQuery.orderedQuery = parsedQuery.leftTerm.toString(); 84 | return parsedQuery.leftTerm.toString(); 85 | } 86 | 87 | String leftBare = computeUniqueNotation(parsedQuery.leftTerm instanceof Subquery ? (Subquery) parsedQuery.leftTerm : new Subquery(parsedQuery.leftTerm)); 88 | String rightBare = computeUniqueNotation(parsedQuery.rightTerm instanceof Subquery ? (Subquery) parsedQuery.rightTerm : new Subquery(parsedQuery.rightTerm)); 89 | 90 | String operator = parsedQuery.operator; 91 | String ordered; 92 | 93 | if (operator.equals("|") || operator.equals("+")) { 94 | if (leftBare.compareTo(rightBare) > 0) { 95 | ordered = rightBare + " " + leftBare + " " + operator; 96 | } else { 97 | ordered = leftBare + " " + rightBare + " " + operator; 98 | } 99 | } else { 100 | ordered = leftBare + " " + rightBare + " " + operator; 101 | } 102 | parsedQuery.orderedQuery = ordered; 103 | return ordered; 104 | } 105 | 106 | public Subquery getParsedQuery() { return parsedQuery; } 107 | 108 | public String getProperty() { return property; } 109 | 110 | public int getDirection() { return direction; } 111 | } 112 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/java,maven,intellij+all 2 | # Edit at https://www.gitignore.io/?templates=java,maven,intellij+all 3 | 4 | ### Intellij+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # Generated files 16 | .idea/**/contentModel.xml 17 | 18 | # Sensitive or high-churn files 19 | .idea/**/dataSources/ 20 | .idea/**/dataSources.ids 21 | .idea/**/dataSources.local.xml 22 | .idea/**/sqlDataSources.xml 23 | .idea/**/dynamic.xml 24 | .idea/**/uiDesigner.xml 25 | .idea/**/dbnavigator.xml 26 | 27 | # Gradle 28 | .idea/**/gradle.xml 29 | .idea/**/libraries 30 | 31 | # Gradle and Maven with auto-import 32 | # When using Gradle or Maven with auto-import, you should exclude module files, 33 | # since they will be recreated, and may cause churn. Uncomment if using 34 | # auto-import. 35 | # .idea/modules.xml 36 | # .idea/*.iml 37 | # .idea/modules 38 | # *.iml 39 | # *.ipr 40 | 41 | # CMake 42 | cmake-build-*/ 43 | 44 | # Mongo Explorer plugin 45 | .idea/**/mongoSettings.xml 46 | 47 | # File-based project format 48 | *.iws 49 | 50 | # IntelliJ 51 | out/ 52 | 53 | # mpeltonen/sbt-idea plugin 54 | .idea_modules/ 55 | 56 | # JIRA plugin 57 | atlassian-ide-plugin.xml 58 | 59 | # Cursive Clojure plugin 60 | .idea/replstate.xml 61 | 62 | # Crashlytics plugin (for Android Studio and IntelliJ) 63 | com_crashlytics_export_strings.xml 64 | crashlytics.properties 65 | crashlytics-build.properties 66 | fabric.properties 67 | 68 | # Editor-based Rest Client 69 | .idea/httpRequests 70 | 71 | # Android studio 3.1+ serialized cache file 72 | .idea/caches/build_file_checksums.ser 73 | 74 | ### Intellij+all Patch ### 75 | # Ignores the whole .idea folder and all .iml files 76 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 77 | 78 | .idea/ 79 | 80 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 81 | 82 | *.iml 83 | modules.xml 84 | .idea/misc.xml 85 | *.ipr 86 | 87 | # Sonarlint plugin 88 | .idea/sonarlint 89 | 90 | ### Java ### 91 | # Compiled class file 92 | *.class 93 | 94 | # Log file 95 | *.log 96 | 97 | # BlueJ files 98 | *.ctxt 99 | 100 | # Mobile Tools for Java (J2ME) 101 | .mtj.tmp/ 102 | 103 | # Package Files # 104 | *.jar 105 | *.war 106 | *.nar 107 | *.ear 108 | *.zip 109 | *.tar.gz 110 | *.rar 111 | 112 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 113 | hs_err_pid* 114 | 115 | ### Maven ### 116 | target/ 117 | pom.xml.tag 118 | pom.xml.releaseBackup 119 | pom.xml.versionsBackup 120 | pom.xml.next 121 | release.properties 122 | dependency-reduced-pom.xml 123 | buildNumber.properties 124 | .mvn/timing.properties 125 | .mvn/wrapper/maven-wrapper.jar 126 | 127 | # End of https://www.gitignore.io/api/java,maven,intellij+all 128 | 129 | 130 | # Created by https://www.gitignore.io/api/eclipse 131 | # Edit at https://www.gitignore.io/?templates=eclipse 132 | 133 | ### Eclipse ### 134 | .metadata 135 | bin/ 136 | tmp/ 137 | *.tmp 138 | *.bak 139 | *.swp 140 | *~.nib 141 | local.properties 142 | .settings/ 143 | .loadpath 144 | .recommenders 145 | 146 | # External tool builders 147 | .externalToolBuilders/ 148 | 149 | # Locally stored "Eclipse launch configurations" 150 | *.launch 151 | 152 | # PyDev specific (Python IDE for Eclipse) 153 | *.pydevproject 154 | 155 | # CDT-specific (C/C++ Development Tooling) 156 | .cproject 157 | 158 | # CDT- autotools 159 | .autotools 160 | 161 | # Java annotation processor (APT) 162 | .factorypath 163 | 164 | # PDT-specific (PHP Development Tools) 165 | .buildpath 166 | 167 | # sbteclipse plugin 168 | .target 169 | 170 | # Tern plugin 171 | .tern-project 172 | 173 | # TeXlipse plugin 174 | .texlipse 175 | 176 | # STS (Spring Tool Suite) 177 | .springBeans 178 | 179 | # Code Recommenders 180 | .recommenders/ 181 | 182 | # Annotation Processing 183 | .apt_generated/ 184 | 185 | # Scala IDE specific (Scala & Java development for Eclipse) 186 | .cache-main 187 | .scala_dependencies 188 | .worksheet 189 | 190 | ### Eclipse Patch ### 191 | # Eclipse Core 192 | .project 193 | 194 | # JDT-specific (Eclipse Java Development Tools) 195 | .classpath 196 | 197 | # Annotation Processing 198 | .apt_generated 199 | 200 | .sts4-cache/ 201 | 202 | # End of https://www.gitignore.io/api/eclipse -------------------------------------------------------------------------------- /src/main/java/TinySearchEngine.java: -------------------------------------------------------------------------------- 1 | /** 2 | * TinySearchEngine.java 3 | * 4 | * Created by S. Stefani on 2016-11-24. 5 | */ 6 | 7 | import se.kth.id1020.TinySearchEngineBase; 8 | import se.kth.id1020.util.Attributes; 9 | import se.kth.id1020.util.Document; 10 | import se.kth.id1020.util.Sentence; 11 | import se.kth.id1020.util.Word; 12 | import java.util.*; 13 | 14 | public class TinySearchEngine implements TinySearchEngineBase { 15 | private HashMap> index; 16 | private HashMap documentsLengths; 17 | private HashMap> cache; 18 | 19 | 20 | public TinySearchEngine() { 21 | this.index = new HashMap>(); 22 | this.documentsLengths = new HashMap(); 23 | this.cache = new HashMap>(); 24 | } 25 | 26 | public void preInserts() { 27 | System.out.println("Executing pre-insert..."); 28 | } 29 | 30 | /** 31 | * Insert all the words of a sentence in the index. 32 | * 33 | * @param sentence is the current sentence 34 | * @param attributes contain the parent document of the sentence 35 | */ 36 | public void insert(Sentence sentence, Attributes attributes) { 37 | for (Word word : sentence.getWords()) { 38 | // Add word to index if not in 39 | if (!index.containsKey(word.word)) { 40 | index.put(word.word, new ArrayList()); 41 | } 42 | 43 | // Create new posting 44 | ArrayList postingList = index.get(word.word); 45 | ResultDocument newPosting = new ResultDocument(attributes.document, 1); 46 | 47 | int ind = Collections.binarySearch(postingList, newPosting); 48 | 49 | // Update posting if existent or add 50 | if (ind < 0) { 51 | postingList.add(-ind-1, newPosting); 52 | } else { 53 | postingList.get(ind).updatePosting(); 54 | } 55 | } 56 | 57 | // Compute and store lengths of documents 58 | Integer sentenceLength = sentence.getWords().size(); 59 | if (documentsLengths.containsKey(attributes.document.name)) { 60 | sentenceLength += documentsLengths.get(attributes.document.name); 61 | } 62 | documentsLengths.put(attributes.document.name, sentenceLength); 63 | } 64 | 65 | public void postInserts() { 66 | System.out.println("Executing post-insert..."); 67 | } 68 | 69 | /** 70 | * Parse a user query and search for all the elements that satisfy such query. 71 | * Order the results according to the user input. 72 | * 73 | * @param s is the input query string 74 | * @return the list of docs that satisfy the query 75 | */ 76 | public List search(String s) { 77 | // Parse query 78 | Query query = new Query(s); 79 | 80 | // Compute Array of result 81 | ArrayList result = runQuery(query.getParsedQuery()); 82 | if (result == null) { return null; } 83 | 84 | // If sorting is specified use comparator to sort 85 | if (query.getProperty() != null && query.getProperty().equals("POPULARITY")) { 86 | Collections.sort(result, new ResultDocument.PopularityComparator(query.getDirection())); 87 | } else if (query.getProperty() != null && query.getProperty().equals("RELEVANCE")) { 88 | Collections.sort(result, new ResultDocument.RelevanceComparator(query.getDirection())); 89 | } 90 | 91 | // Convert into list of documents 92 | List documentList = new LinkedList(); 93 | for (ResultDocument rd : result) { documentList.add(rd.getDocument()); } 94 | 95 | return documentList; 96 | } 97 | 98 | /** 99 | * Recursively analyse the query and compute the results considering the query operators. 100 | * 101 | * @param subQ is the sub-query object (result of the query parsing) 102 | * @return an array list of documents 103 | */ 104 | private ArrayList runQuery(Subquery subQ) { 105 | if (subQ.rightTerm == null) { 106 | 107 | if (!index.containsKey(subQ.leftTerm)) return new ArrayList(); 108 | ArrayList list = new ArrayList(); 109 | for (ResultDocument value : index.get(subQ.leftTerm)) { 110 | ResultDocument newRD = new ResultDocument(value.getDocument(), value.getHits()); 111 | newRD.computeRelevance(documentsLengths, index.get(subQ.leftTerm).size()); 112 | list.add(newRD); 113 | } 114 | 115 | return list; 116 | } 117 | 118 | // Check if the query is cached 119 | if (cache.containsKey(subQ.orderedQuery)) { 120 | // System.out.println("Cache hit: " + subQ.toString()); 121 | return cache.get(subQ.orderedQuery); 122 | } 123 | 124 | ArrayList leftResult = runQuery(subQ.leftTerm instanceof Subquery ? (Subquery) subQ.leftTerm : new Subquery(subQ.leftTerm)); 125 | ArrayList rightResult = runQuery(subQ.rightTerm instanceof Subquery ? (Subquery) subQ.rightTerm : new Subquery(subQ.rightTerm)); 126 | String operator = subQ.operator; 127 | 128 | // Run query operations (union, intersection, difference) 129 | ArrayList result; 130 | if (operator.equals("+")) { 131 | result = resultIntersection(leftResult, rightResult); 132 | } else if (operator.equals("|")) { 133 | result = resultUnion(leftResult, rightResult); 134 | } else { 135 | result = resultDifference(leftResult, rightResult); 136 | } 137 | 138 | // Cache the result 139 | cache.put(subQ.orderedQuery, result); 140 | // System.out.println("Add to cache: " + subQ.toString()); 141 | 142 | return result; 143 | } 144 | 145 | // Compute intersection of two queries 146 | private ArrayList resultIntersection(ArrayList l, ArrayList r) { 147 | ArrayList result = new ArrayList(); 148 | for (ResultDocument rd : l) { 149 | int ind = Collections.binarySearch(r, rd); 150 | if (ind >= 0) { 151 | result.add(merge(rd, r.get(ind))); 152 | } 153 | } 154 | 155 | return result; 156 | } 157 | 158 | // Compute union of two queries 159 | private ArrayList resultUnion(ArrayList l, ArrayList r) { 160 | ArrayList result = new ArrayList(); 161 | result.addAll(l); 162 | for (ResultDocument rd : r) { 163 | int ind = Collections.binarySearch(result, rd); 164 | if (ind >= 0) { 165 | result.set(ind, merge(result.get(ind), rd)); 166 | } else { 167 | result.add(-ind-1, rd); 168 | } 169 | } 170 | 171 | return result; 172 | } 173 | 174 | // Compute difference of two queries 175 | private ArrayList resultDifference(ArrayList l, ArrayList r) { 176 | ArrayList result = new ArrayList(); 177 | 178 | for (ResultDocument rd : l) { 179 | if (!r.contains(rd)) { result.add(rd); } 180 | } 181 | 182 | return result; 183 | } 184 | 185 | private ResultDocument merge(ResultDocument u, ResultDocument v) { 186 | return new ResultDocument(u.getDocument(), u.getRelevance() + v.getRelevance()); 187 | } 188 | 189 | /** 190 | * Output the infix version of the query string (useful to check correctness of parser) 191 | * 192 | * @param s is the user query string 193 | * @return the infix version of query 194 | */ 195 | public String infix(String s) { 196 | Query query = new Query(s); 197 | String dir = query.getDirection() == 1 ? "asc" : "desc"; 198 | return query.getParsedQuery().toString() + " orderby " + query.getProperty().toLowerCase() + " " + dir; 199 | } 200 | } --------------------------------------------------------------------------------