├── src
└── main
│ ├── resources
│ ├── proj1_schema.png
│ └── proj2_schema.png
│ └── java
│ ├── Main.java
│ ├── Subquery.java
│ ├── ResultDocument.java
│ ├── Query.java
│ └── TinySearchEngine.java
├── .github
└── workflows
│ └── workflow.yml
├── LICENSE
├── pom.xml
├── README.md
└── .gitignore
/src/main/resources/proj1_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimoneStefani/simple-search-engine/HEAD/src/main/resources/proj1_schema.png
--------------------------------------------------------------------------------
/src/main/resources/proj2_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SimoneStefani/simple-search-engine/HEAD/src/main/resources/proj2_schema.png
--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 |
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - uses: actions/checkout@v1
12 | - name: Set up JDK 1.8
13 | uses: actions/setup-java@v1
14 | with:
15 | java-version: 1.8
16 | - name: Build with Maven
17 | run: mvn package --file pom.xml
18 |
--------------------------------------------------------------------------------
/src/main/java/Main.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Main.java
3 | *
4 | * Created by S. Stefani on 2016-11-24.
5 | */
6 |
7 | import se.kth.id1020.Driver;
8 | import se.kth.id1020.TinySearchEngineBase;
9 |
10 | public class Main {
11 | public static void main(String[] args) throws Exception {
12 | TinySearchEngineBase searchEngine = new TinySearchEngine();
13 | Driver.run(searchEngine);
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Simone Stefani
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.github.SimoneStefani.simple-search-engine
8 | simple-search-engine
9 | 1.0
10 |
11 |
12 |
13 | se.kth.id1020
14 | tinySearchEngine
15 | 2.0
16 |
17 |
18 | edu.princeton.cs.introcs
19 | algs4-package
20 | 1.0
21 |
22 |
23 | edu.princeton.cs.introcs
24 | stdlib-package
25 | 1.0
26 |
27 |
28 |
29 |
30 | sics−release
31 | SICS Release Repository
32 | http://kompics.sics.se/maven/repository
33 |
34 |
35 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #### Royal Institute of Technology KTH - Stockholm
2 |
3 | # Simple Search Engine
4 |
5 |
6 |
7 |
8 |
9 | A simple search engine to index a corpus of documents and search for words with specific query paramteres.
10 | This project is part of the course ID1020 Algorithms and Data Structures.
11 |
12 | _This repository contains code written during the fall semester 2016 by Simone Stefani_
13 |
14 | ### Structure
15 |
16 | 
17 |
18 | ### Description
19 |
20 | - **Index**: a HashMap that contains all the indexed words as word-list_of_postings key-value pairs.
21 | - **ResultDocument**: an object that links a word (or a set of word) with a document that contains it. It refers to a specific `document` and carries properties related to the words such as `hits`, `populairty` and `relevance` (as tf-idf).
22 |
23 | The search engine contains other two HashMaps:
24 |
25 | - **DocumentsLength**: keeps track of the length of each processed document.
26 | - **Cache**: contains cached queries
27 |
28 | _The the postings (resultDocuments) for each word are sorted dynamically at insertion. Consequently they can be retrieved through binary search._
29 |
30 | When the user input query string is processed a parsedQuery is returned in the form of nested sub-query objects. Consequently when searching for a complex query, the parsedQuery can be analysed recursively and the fundamental queries can be then combined with operators.
31 |
--------------------------------------------------------------------------------
/src/main/java/Subquery.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Subquery.java
3 | *
4 | * Created by S. Stefani on 2017-01-06.
5 | */
6 |
7 | public class Subquery implements Comparable {
8 | public Comparable leftTerm = null;
9 | public Comparable rightTerm = null;
10 | public String operator;
11 | public String orderedQuery;
12 |
13 | /**
14 | * Construct a sub-query formed only by one element.
15 | *
16 | * @param leftTerm is the term in the sub-query.
17 | */
18 | public Subquery(Comparable leftTerm) {
19 | this.leftTerm = leftTerm;
20 | this.orderedQuery = leftTerm.toString();
21 | }
22 |
23 | /**
24 | * Construct a sub-query formed by a triplet of elements.
25 | *
26 | * @param leftTerm is the left-hand term in the sub-query
27 | * @param operator is the element that connects two terms of the sub-query
28 | * @param rightTerm is the right-hand term in the sub-query
29 | */
30 | public Subquery(Comparable leftTerm, String operator, Comparable rightTerm) {
31 |
32 | this.leftTerm = leftTerm;
33 | this.operator = operator;
34 | this.rightTerm = rightTerm;
35 | }
36 |
37 | /**
38 | * Generate a string with the infix notation of the sub-query.
39 | *
40 | * @return infix sub-query
41 | */
42 | @Override
43 | public String toString() {
44 | if (rightTerm == null) return leftTerm.toString();
45 |
46 | return "(" + leftTerm.toString() + " " + operator + " " + rightTerm.toString() + ")";
47 | }
48 |
49 | /**
50 | * Simple comparator for two sub-queries.
51 | *
52 | * @param o is the sub-query to compare to
53 | * @return usual comparator integer result (-1/0/1)
54 | */
55 | public int compareTo(String o) {
56 | return this.toString().compareTo(o);
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/ResultDocument.java:
--------------------------------------------------------------------------------
1 | /**
2 | * ResultDocument.java
3 | *
4 | * Created by S. Stefani on 2017-01-06.
5 | */
6 |
7 | import se.kth.id1020.util.Document;
8 |
9 | import java.util.Comparator;
10 | import java.util.HashMap;
11 |
12 | public class ResultDocument implements Comparable {
13 | private Document document;
14 | private int hits;
15 | private int popularity;
16 | private double relevance;
17 |
18 | public ResultDocument(Document document, int hits) {
19 | this.document = document;
20 | this.hits = hits;
21 | this.popularity = document.popularity;
22 | }
23 |
24 | public ResultDocument(Document document, double relevance) {
25 | this.relevance = relevance;
26 | this.document = document;
27 | this.popularity = document.popularity;
28 | }
29 |
30 | /**
31 | * Compute the relevance of posting respect to the executed query by means
32 | * of tf-idf.
33 | *
34 | * @param documentsLengths contains the lengths of all the documents
35 | * @param relevantDocs is the number of relevant docs for the query
36 | */
37 | public void computeRelevance(HashMap documentsLengths, int relevantDocs) {
38 | relevance = tf(documentsLengths.get(document.name)) * idf(documentsLengths.size(), relevantDocs);
39 | }
40 |
41 | // Compute tf
42 | private double tf(int totalTerms) {
43 | return (double) this.hits / totalTerms;
44 | }
45 |
46 | // Compute idf
47 | private double idf(int totalDocs, int relevantDocs) {
48 | return Math.log10((double) totalDocs / (double) relevantDocs);
49 | }
50 |
51 | // Increment the number of hits
52 | public void updatePosting() {
53 | this.hits++;
54 | }
55 |
56 | public Document getDocument() {
57 | return document;
58 | }
59 |
60 | public int getHits() {
61 | return hits;
62 | }
63 |
64 | public int getPopularity() {
65 | return popularity;
66 | }
67 |
68 | public double getRelevance() {
69 | return relevance;
70 | }
71 |
72 | @Override
73 | public boolean equals(Object o) {
74 | if (this == o) return true;
75 | if (o == null || getClass() != o.getClass()) return false;
76 |
77 | ResultDocument that = (ResultDocument) o;
78 |
79 | return this.document.name.equals(that.document.name);
80 | }
81 |
82 | public int compareTo(ResultDocument o) {
83 | return this.document.name.compareTo(o.document.name);
84 | }
85 |
86 | public static class PopularityComparator implements Comparator {
87 | private int direction;
88 |
89 | public PopularityComparator(int direction) {
90 | this.direction = direction;
91 | }
92 |
93 | public int compare(ResultDocument o1, ResultDocument o2) {
94 | if (o1.getPopularity() < o2.getPopularity()) return -1 * direction;
95 | if (o1.getPopularity() > o2.getPopularity()) return direction;
96 | return 0;
97 | }
98 | }
99 |
100 | public static class RelevanceComparator implements Comparator {
101 | private int direction;
102 |
103 | public RelevanceComparator(int direction) {
104 | this.direction = direction;
105 | }
106 |
107 | public int compare(ResultDocument o1, ResultDocument o2) {
108 | if (o1.getRelevance() < o2.getRelevance()) return -1 * direction;
109 | if (o1.getRelevance() > o2.getRelevance()) return direction;
110 | return 0;
111 | }
112 | }
113 | }
--------------------------------------------------------------------------------
/src/main/java/Query.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Query.java
3 | *
4 | * Created by S. Stefani on 2016-12-02.
5 | */
6 |
7 | import edu.princeton.cs.algs4.Stack;
8 |
9 | public class Query {
10 | private Subquery parsedQuery;
11 | private String property;
12 | private int direction;
13 |
14 | /**
15 | * Receives the user input and run a parser on it.
16 | *
17 | * @param queryString the user input query string
18 | */
19 | public Query(String queryString) {
20 | parseQuery(queryString);
21 | }
22 |
23 | /**
24 | * Parse a query string separating the composing elements:
25 | * the parsed query (in form of sub-query), the sorting property and direction.
26 | *
27 | * @param str is the query to parse
28 | */
29 | private void parseQuery(String str) {
30 | String[] parts = str.split("orderby");
31 | String[] elements = parts[0].split("\\s+");
32 |
33 | // Use two-stack algorithm to parse prefix notation
34 | Stack> terms = new Stack>();
35 | Stack> helper = new Stack>();
36 |
37 | for (String el : elements) { terms.push(el); }
38 | while (!terms.isEmpty()) {
39 | Comparable term = terms.pop();
40 | String operands = "+|-";
41 | if (operands.contains(term.toString())) {
42 | Comparable leftSide = helper.pop();
43 | Comparable rightSide = helper.pop();
44 | helper.push(new Subquery(leftSide, term.toString(), rightSide));
45 | } else {
46 | helper.push(term);
47 | }
48 | }
49 |
50 | Comparable resultQuery = helper.pop();
51 | parsedQuery = resultQuery instanceof String ? new Subquery(resultQuery) : (Subquery) resultQuery;
52 | computeUniqueNotation(parsedQuery);
53 |
54 | if (parts.length < 2) {
55 | return;
56 | }
57 |
58 | // Parse sorting properties
59 | if (parts[1].contains("relevance")) {
60 | property = "RELEVANCE";
61 | } else if (parts[1].contains("popularity")) {
62 | property = "POPULARITY";
63 | }
64 |
65 | if (parts[1].contains("asc")) {
66 | direction = 1;
67 | } else if (parts[1].contains("desc")) {
68 | direction = -1;
69 | }
70 | }
71 |
72 | /**
73 | * Analyse a query (sub-query object) and generates a unique notation for each
74 | * of the composing elements. This notation ensure that if two terms are connected
75 | * by a commutative operator, they are also ordered alphabetically in the sub-query.
76 | * This is important to allow the caching system to work with commutative queries.
77 | *
78 | * @param parsedQuery is the parsed version of the user query
79 | * @return an ordered string version of the query
80 | */
81 | private String computeUniqueNotation(Subquery parsedQuery) {
82 | if (parsedQuery.rightTerm == null) {
83 | parsedQuery.orderedQuery = parsedQuery.leftTerm.toString();
84 | return parsedQuery.leftTerm.toString();
85 | }
86 |
87 | String leftBare = computeUniqueNotation(parsedQuery.leftTerm instanceof Subquery ? (Subquery) parsedQuery.leftTerm : new Subquery(parsedQuery.leftTerm));
88 | String rightBare = computeUniqueNotation(parsedQuery.rightTerm instanceof Subquery ? (Subquery) parsedQuery.rightTerm : new Subquery(parsedQuery.rightTerm));
89 |
90 | String operator = parsedQuery.operator;
91 | String ordered;
92 |
93 | if (operator.equals("|") || operator.equals("+")) {
94 | if (leftBare.compareTo(rightBare) > 0) {
95 | ordered = rightBare + " " + leftBare + " " + operator;
96 | } else {
97 | ordered = leftBare + " " + rightBare + " " + operator;
98 | }
99 | } else {
100 | ordered = leftBare + " " + rightBare + " " + operator;
101 | }
102 | parsedQuery.orderedQuery = ordered;
103 | return ordered;
104 | }
105 |
106 | public Subquery getParsedQuery() { return parsedQuery; }
107 |
108 | public String getProperty() { return property; }
109 |
110 | public int getDirection() { return direction; }
111 | }
112 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.gitignore.io/api/java,maven,intellij+all
2 | # Edit at https://www.gitignore.io/?templates=java,maven,intellij+all
3 |
4 | ### Intellij+all ###
5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7 |
8 | # User-specific stuff
9 | .idea/**/workspace.xml
10 | .idea/**/tasks.xml
11 | .idea/**/usage.statistics.xml
12 | .idea/**/dictionaries
13 | .idea/**/shelf
14 |
15 | # Generated files
16 | .idea/**/contentModel.xml
17 |
18 | # Sensitive or high-churn files
19 | .idea/**/dataSources/
20 | .idea/**/dataSources.ids
21 | .idea/**/dataSources.local.xml
22 | .idea/**/sqlDataSources.xml
23 | .idea/**/dynamic.xml
24 | .idea/**/uiDesigner.xml
25 | .idea/**/dbnavigator.xml
26 |
27 | # Gradle
28 | .idea/**/gradle.xml
29 | .idea/**/libraries
30 |
31 | # Gradle and Maven with auto-import
32 | # When using Gradle or Maven with auto-import, you should exclude module files,
33 | # since they will be recreated, and may cause churn. Uncomment if using
34 | # auto-import.
35 | # .idea/modules.xml
36 | # .idea/*.iml
37 | # .idea/modules
38 | # *.iml
39 | # *.ipr
40 |
41 | # CMake
42 | cmake-build-*/
43 |
44 | # Mongo Explorer plugin
45 | .idea/**/mongoSettings.xml
46 |
47 | # File-based project format
48 | *.iws
49 |
50 | # IntelliJ
51 | out/
52 |
53 | # mpeltonen/sbt-idea plugin
54 | .idea_modules/
55 |
56 | # JIRA plugin
57 | atlassian-ide-plugin.xml
58 |
59 | # Cursive Clojure plugin
60 | .idea/replstate.xml
61 |
62 | # Crashlytics plugin (for Android Studio and IntelliJ)
63 | com_crashlytics_export_strings.xml
64 | crashlytics.properties
65 | crashlytics-build.properties
66 | fabric.properties
67 |
68 | # Editor-based Rest Client
69 | .idea/httpRequests
70 |
71 | # Android studio 3.1+ serialized cache file
72 | .idea/caches/build_file_checksums.ser
73 |
74 | ### Intellij+all Patch ###
75 | # Ignores the whole .idea folder and all .iml files
76 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
77 |
78 | .idea/
79 |
80 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
81 |
82 | *.iml
83 | modules.xml
84 | .idea/misc.xml
85 | *.ipr
86 |
87 | # Sonarlint plugin
88 | .idea/sonarlint
89 |
90 | ### Java ###
91 | # Compiled class file
92 | *.class
93 |
94 | # Log file
95 | *.log
96 |
97 | # BlueJ files
98 | *.ctxt
99 |
100 | # Mobile Tools for Java (J2ME)
101 | .mtj.tmp/
102 |
103 | # Package Files #
104 | *.jar
105 | *.war
106 | *.nar
107 | *.ear
108 | *.zip
109 | *.tar.gz
110 | *.rar
111 |
112 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
113 | hs_err_pid*
114 |
115 | ### Maven ###
116 | target/
117 | pom.xml.tag
118 | pom.xml.releaseBackup
119 | pom.xml.versionsBackup
120 | pom.xml.next
121 | release.properties
122 | dependency-reduced-pom.xml
123 | buildNumber.properties
124 | .mvn/timing.properties
125 | .mvn/wrapper/maven-wrapper.jar
126 |
127 | # End of https://www.gitignore.io/api/java,maven,intellij+all
128 |
129 |
130 | # Created by https://www.gitignore.io/api/eclipse
131 | # Edit at https://www.gitignore.io/?templates=eclipse
132 |
133 | ### Eclipse ###
134 | .metadata
135 | bin/
136 | tmp/
137 | *.tmp
138 | *.bak
139 | *.swp
140 | *~.nib
141 | local.properties
142 | .settings/
143 | .loadpath
144 | .recommenders
145 |
146 | # External tool builders
147 | .externalToolBuilders/
148 |
149 | # Locally stored "Eclipse launch configurations"
150 | *.launch
151 |
152 | # PyDev specific (Python IDE for Eclipse)
153 | *.pydevproject
154 |
155 | # CDT-specific (C/C++ Development Tooling)
156 | .cproject
157 |
158 | # CDT- autotools
159 | .autotools
160 |
161 | # Java annotation processor (APT)
162 | .factorypath
163 |
164 | # PDT-specific (PHP Development Tools)
165 | .buildpath
166 |
167 | # sbteclipse plugin
168 | .target
169 |
170 | # Tern plugin
171 | .tern-project
172 |
173 | # TeXlipse plugin
174 | .texlipse
175 |
176 | # STS (Spring Tool Suite)
177 | .springBeans
178 |
179 | # Code Recommenders
180 | .recommenders/
181 |
182 | # Annotation Processing
183 | .apt_generated/
184 |
185 | # Scala IDE specific (Scala & Java development for Eclipse)
186 | .cache-main
187 | .scala_dependencies
188 | .worksheet
189 |
190 | ### Eclipse Patch ###
191 | # Eclipse Core
192 | .project
193 |
194 | # JDT-specific (Eclipse Java Development Tools)
195 | .classpath
196 |
197 | # Annotation Processing
198 | .apt_generated
199 |
200 | .sts4-cache/
201 |
202 | # End of https://www.gitignore.io/api/eclipse
--------------------------------------------------------------------------------
/src/main/java/TinySearchEngine.java:
--------------------------------------------------------------------------------
1 | /**
2 | * TinySearchEngine.java
3 | *
4 | * Created by S. Stefani on 2016-11-24.
5 | */
6 |
7 | import se.kth.id1020.TinySearchEngineBase;
8 | import se.kth.id1020.util.Attributes;
9 | import se.kth.id1020.util.Document;
10 | import se.kth.id1020.util.Sentence;
11 | import se.kth.id1020.util.Word;
12 | import java.util.*;
13 |
14 | public class TinySearchEngine implements TinySearchEngineBase {
15 | private HashMap> index;
16 | private HashMap documentsLengths;
17 | private HashMap> cache;
18 |
19 |
20 | public TinySearchEngine() {
21 | this.index = new HashMap>();
22 | this.documentsLengths = new HashMap();
23 | this.cache = new HashMap>();
24 | }
25 |
26 | public void preInserts() {
27 | System.out.println("Executing pre-insert...");
28 | }
29 |
30 | /**
31 | * Insert all the words of a sentence in the index.
32 | *
33 | * @param sentence is the current sentence
34 | * @param attributes contain the parent document of the sentence
35 | */
36 | public void insert(Sentence sentence, Attributes attributes) {
37 | for (Word word : sentence.getWords()) {
38 | // Add word to index if not in
39 | if (!index.containsKey(word.word)) {
40 | index.put(word.word, new ArrayList());
41 | }
42 |
43 | // Create new posting
44 | ArrayList postingList = index.get(word.word);
45 | ResultDocument newPosting = new ResultDocument(attributes.document, 1);
46 |
47 | int ind = Collections.binarySearch(postingList, newPosting);
48 |
49 | // Update posting if existent or add
50 | if (ind < 0) {
51 | postingList.add(-ind-1, newPosting);
52 | } else {
53 | postingList.get(ind).updatePosting();
54 | }
55 | }
56 |
57 | // Compute and store lengths of documents
58 | Integer sentenceLength = sentence.getWords().size();
59 | if (documentsLengths.containsKey(attributes.document.name)) {
60 | sentenceLength += documentsLengths.get(attributes.document.name);
61 | }
62 | documentsLengths.put(attributes.document.name, sentenceLength);
63 | }
64 |
65 | public void postInserts() {
66 | System.out.println("Executing post-insert...");
67 | }
68 |
69 | /**
70 | * Parse a user query and search for all the elements that satisfy such query.
71 | * Order the results according to the user input.
72 | *
73 | * @param s is the input query string
74 | * @return the list of docs that satisfy the query
75 | */
76 | public List search(String s) {
77 | // Parse query
78 | Query query = new Query(s);
79 |
80 | // Compute Array of result
81 | ArrayList result = runQuery(query.getParsedQuery());
82 | if (result == null) { return null; }
83 |
84 | // If sorting is specified use comparator to sort
85 | if (query.getProperty() != null && query.getProperty().equals("POPULARITY")) {
86 | Collections.sort(result, new ResultDocument.PopularityComparator(query.getDirection()));
87 | } else if (query.getProperty() != null && query.getProperty().equals("RELEVANCE")) {
88 | Collections.sort(result, new ResultDocument.RelevanceComparator(query.getDirection()));
89 | }
90 |
91 | // Convert into list of documents
92 | List documentList = new LinkedList();
93 | for (ResultDocument rd : result) { documentList.add(rd.getDocument()); }
94 |
95 | return documentList;
96 | }
97 |
98 | /**
99 | * Recursively analyse the query and compute the results considering the query operators.
100 | *
101 | * @param subQ is the sub-query object (result of the query parsing)
102 | * @return an array list of documents
103 | */
104 | private ArrayList runQuery(Subquery subQ) {
105 | if (subQ.rightTerm == null) {
106 |
107 | if (!index.containsKey(subQ.leftTerm)) return new ArrayList();
108 | ArrayList list = new ArrayList();
109 | for (ResultDocument value : index.get(subQ.leftTerm)) {
110 | ResultDocument newRD = new ResultDocument(value.getDocument(), value.getHits());
111 | newRD.computeRelevance(documentsLengths, index.get(subQ.leftTerm).size());
112 | list.add(newRD);
113 | }
114 |
115 | return list;
116 | }
117 |
118 | // Check if the query is cached
119 | if (cache.containsKey(subQ.orderedQuery)) {
120 | // System.out.println("Cache hit: " + subQ.toString());
121 | return cache.get(subQ.orderedQuery);
122 | }
123 |
124 | ArrayList leftResult = runQuery(subQ.leftTerm instanceof Subquery ? (Subquery) subQ.leftTerm : new Subquery(subQ.leftTerm));
125 | ArrayList rightResult = runQuery(subQ.rightTerm instanceof Subquery ? (Subquery) subQ.rightTerm : new Subquery(subQ.rightTerm));
126 | String operator = subQ.operator;
127 |
128 | // Run query operations (union, intersection, difference)
129 | ArrayList result;
130 | if (operator.equals("+")) {
131 | result = resultIntersection(leftResult, rightResult);
132 | } else if (operator.equals("|")) {
133 | result = resultUnion(leftResult, rightResult);
134 | } else {
135 | result = resultDifference(leftResult, rightResult);
136 | }
137 |
138 | // Cache the result
139 | cache.put(subQ.orderedQuery, result);
140 | // System.out.println("Add to cache: " + subQ.toString());
141 |
142 | return result;
143 | }
144 |
145 | // Compute intersection of two queries
146 | private ArrayList resultIntersection(ArrayList l, ArrayList r) {
147 | ArrayList result = new ArrayList();
148 | for (ResultDocument rd : l) {
149 | int ind = Collections.binarySearch(r, rd);
150 | if (ind >= 0) {
151 | result.add(merge(rd, r.get(ind)));
152 | }
153 | }
154 |
155 | return result;
156 | }
157 |
158 | // Compute union of two queries
159 | private ArrayList resultUnion(ArrayList l, ArrayList r) {
160 | ArrayList result = new ArrayList();
161 | result.addAll(l);
162 | for (ResultDocument rd : r) {
163 | int ind = Collections.binarySearch(result, rd);
164 | if (ind >= 0) {
165 | result.set(ind, merge(result.get(ind), rd));
166 | } else {
167 | result.add(-ind-1, rd);
168 | }
169 | }
170 |
171 | return result;
172 | }
173 |
174 | // Compute difference of two queries
175 | private ArrayList resultDifference(ArrayList l, ArrayList r) {
176 | ArrayList result = new ArrayList();
177 |
178 | for (ResultDocument rd : l) {
179 | if (!r.contains(rd)) { result.add(rd); }
180 | }
181 |
182 | return result;
183 | }
184 |
185 | private ResultDocument merge(ResultDocument u, ResultDocument v) {
186 | return new ResultDocument(u.getDocument(), u.getRelevance() + v.getRelevance());
187 | }
188 |
189 | /**
190 | * Output the infix version of the query string (useful to check correctness of parser)
191 | *
192 | * @param s is the user query string
193 | * @return the infix version of query
194 | */
195 | public String infix(String s) {
196 | Query query = new Query(s);
197 | String dir = query.getDirection() == 1 ? "asc" : "desc";
198 | return query.getParsedQuery().toString() + " orderby " + query.getProperty().toLowerCase() + " " + dir;
199 | }
200 | }
--------------------------------------------------------------------------------