├── .gitignore ├── LICENSE ├── README.md ├── project.clj ├── src └── clojure_lucene_demo │ └── core.clj └── test └── clojure_lucene_demo └── test └── core.clj /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | lib/ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Clojure Lucene Demo 2 | =================== 3 | 4 | The original reason for writing this code was to create a quick 5 | proof-of-concept demonstrating a problem I was having with filters in Lucene. 6 | When I added a filter my query returned zero results. As [Uwe Schindler][uwe] 7 | pointed out this happened because my code told Lucene to analyze the field, 8 | while filters expect a literal value unless you customize them with an 9 | analyzer on the query side. Thanks, Uwe! 10 | 11 | Now I took the time to write this abstract example of how I use Lucene with 12 | Clojure, I'm planning to keep it up as reference for others. That way, the 13 | code will serve some purpose now the problem is solved. Of course, any errors 14 | or non-idiomatic usage should be fixed by that point. Keep in mind that this 15 | is the first Clojure code I'm releasing, only two weeks after first picking up 16 | the language, so there might be a few non-idiomatic gotchas lurking around! 17 | 18 | Running the code is easy (assuming you have [Leiningen][lein] installed): 19 | 20 | git clone https://github.com/fmw/clojure-lucene-demo.git 21 | cd clojure-lucene-demo 22 | lein test 23 | 24 | Check [Mark Triggs' Mailindex repository][mailindex] for another example of 25 | Lucene code in Clojure. Also, make sure to look at the [unit 26 | tests][test-search], because they provide the best documentation to the code. 27 | Feel free to contact me using the email address below in case you have any 28 | questions or suggestions. 29 | 30 | [uwe]: http://www.thetaphi.de/ 31 | 32 | [test-search]: https://github.com/fmw/clojure-lucene-demo/blob/master/test/clojure_lucene_demo/test/core.clj 33 | 34 | [lein]: https://github.com/technomancy/leiningen 35 | 36 | [mailindex]: https://github.com/marktriggs/mailindex 37 | 38 | Copyright 2011, F.M. (Filip) de Waard <>. 39 | Distributed under the Apache License, version 2 (see the LICENSE file). 40 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject clojure-lucene-demo "1.0.0-SNAPSHOT" 2 | :description "Demo implementation of Lucene code in Clojure" 3 | :license {:name "Apache License, version 2."} 4 | :dependencies [[org.clojure/clojure "1.2.0"] 5 | [org.clojure/clojure-contrib "1.2.0"] 6 | [clojure-couchdb "0.4.5"] 7 | [org.apache.lucene/lucene-core "3.0.3"] 8 | [org.apache.lucene/lucene-queries "3.0.3"]] 9 | :dev-dependencies [[org.clojars.autre/lein-vimclojure "1.0.0"]] 10 | :main clojure-lucene-demo.core) 11 | 12 | -------------------------------------------------------------------------------- /src/clojure_lucene_demo/core.clj: -------------------------------------------------------------------------------- 1 | ; src/clojure_lucene_demo/core.clj: Demonstrating Lucene API using Clojure 2 | ; 3 | ; Copyright 2011, F.M. (Filip) de Waard . 4 | ; 5 | ; Licensed under the Apache License, Version 2.0 (the "License"); 6 | ; you may not use this file except in compliance with the License. 7 | ; You may obtain a copy of the License at 8 | ; 9 | ; http://www.apache.org/licenses/LICENSE-2.0 10 | ; 11 | ; Unless required by applicable law or agreed to in writing, software 12 | ; distributed under the License is distributed on an "AS IS" BASIS, 13 | ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | ; See the License for the specific language governing permissions and 15 | ; limitations under the License. 16 | 17 | (ns clojure-lucene-demo.core 18 | (:use [clojure.string :only (lower-case)]) 19 | (:import (org.apache.lucene.document 20 | Document Field Field$Store Field$Index NumericField) 21 | (org.apache.lucene.analysis.standard StandardAnalyzer) 22 | (org.apache.lucene.store NIOFSDirectory RAMDirectory) 23 | (org.apache.lucene.search 24 | IndexSearcher QueryWrapperFilter TermQuery Sort) 25 | (org.apache.lucene.queryParser QueryParser) 26 | (org.apache.lucene.index IndexWriter IndexWriter$MaxFieldLength 27 | IndexReader Term) 28 | (org.apache.lucene.util Version) 29 | (java.io File))) 30 | 31 | (defn create-analyzer [] 32 | (StandardAnalyzer. (. Version LUCENE_30))) 33 | 34 | (defn create-directory [path] 35 | (if (= path :RAM) 36 | (RAMDirectory.) 37 | (NIOFSDirectory. (File. path)))) 38 | 39 | (defn create-index-reader [#^Directory directory] 40 | (IndexReader/open directory)) 41 | 42 | (defn #^Field create-field 43 | "Creates a new Lucene Field object." 44 | ([field-name value] 45 | (create-field field-name value :stored :analyzed)) 46 | ([field-name value & options] 47 | (Field. field-name (str value) 48 | (if (some #{:stored} options) 49 | (Field$Store/YES) 50 | (Field$Store/NO)) 51 | (if (some #{:analyzed} options) 52 | (Field$Index/ANALYZED) 53 | (if (some #{:dont-index} options) 54 | (Field$Index/NO) 55 | (Field$Index/NOT_ANALYZED)))))) 56 | 57 | (defn create-document 58 | "Creates a new Lucene Document object using the input provided." 59 | [item] 60 | (let [{:keys [title description category]} item 61 | #^Document document (Document.)] 62 | 63 | (doto document 64 | ; index a fulltext field with all the values to search on 65 | ; mashed together in a single value, but there is no need 66 | ; to store this field. 67 | (.add (create-field "fulltext" 68 | (apply str(interpose " " [title description])) :analyzed)) 69 | 70 | ; this field is included to run filters on. It is indexed, 71 | ; but not analyzed or tokenized (i.e. you need to use 72 | ; literal values in the filter or it won't match). 73 | (.add (create-field "category" category :stored)) 74 | 75 | ; these fields are just stored in order to be able to display 76 | ; them in the search results without loading the document 77 | ; from a database 78 | (.add (create-field "title" title :stored :dont-index)) 79 | (.add (create-field "description" description :stored :dont-index))))) 80 | 81 | (defn write-index! [directory items] 82 | (let [analyzer (create-analyzer) 83 | writer (IndexWriter. 84 | directory analyzer IndexWriter$MaxFieldLength/UNLIMITED)] 85 | 86 | (doto writer 87 | (.setRAMBufferSizeMB 64)) ;maybe .setUseCompoundFile false? 88 | 89 | (doseq [item items] 90 | (.addDocument writer (create-document item))) 91 | 92 | (doto writer 93 | (.optimize) 94 | (.close)))) 95 | 96 | (defn get-doc [reader doc-id] 97 | (.document reader doc-id)) 98 | 99 | (defn get-docs [reader docs] 100 | (map #(get-doc reader (.doc %)) docs)) 101 | 102 | (defn create-filter [filters] 103 | "Creates a filter for the category, which is wrapped in double quotes." 104 | (let [{:keys [category]} filters] 105 | 106 | ; I want to use a BooleanQuery here, wrapping the category 107 | ; as well as some NumericRangeQuery objects on fields that 108 | ; aren't in the dummy data set, but lets start simple for 109 | ; this little proof-of-concept. 110 | 111 | ; Originally, I used a normal filter (wrapped in a ChainedFilter) 112 | ; but due to the fact that ChainedFilter isn't in lucene-core 113 | ; I decided to go for a QueryWrapperFilter instead. 114 | 115 | (when category 116 | (QueryWrapperFilter. 117 | (TermQuery. (Term. "category" (lower-case category))))))) 118 | 119 | (defn search [query query-filter limit reader analyzer] 120 | (let [searcher (IndexSearcher. reader) 121 | parser (QueryParser. (Version/LUCENE_30) "fulltext" analyzer) 122 | q (.parse parser query) 123 | top-docs (if-not (nil? query-filter) 124 | (.search searcher q query-filter limit (new Sort)) 125 | (.search searcher q limit))] 126 | 127 | (. searcher close) 128 | 129 | {:total-hits (.totalHits top-docs) 130 | :docs (.scoreDocs top-docs)})) 131 | 132 | -------------------------------------------------------------------------------- /test/clojure_lucene_demo/test/core.clj: -------------------------------------------------------------------------------- 1 | ; test/clojure_lucene_demo/test/core.clj: Tests for Lucene code. 2 | ; 3 | ; Copyright 2011, F.M. (Filip) de Waard . 4 | ; 5 | ; Licensed under the Apache License, Version 2.0 (the "License"); 6 | ; you may not use this file except in compliance with the License. 7 | ; You may obtain a copy of the License at 8 | ; 9 | ; http://www.apache.org/licenses/LICENSE-2.0 10 | ; 11 | ; Unless required by applicable law or agreed to in writing, software 12 | ; distributed under the License is distributed on an "AS IS" BASIS, 13 | ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | ; See the License for the specific language governing permissions and 15 | ; limitations under the License. 16 | 17 | (ns clojure-lucene-demo.test.core 18 | (:use [clojure-lucene-demo.core] :reload) 19 | (:use [clojure.test]) 20 | (:use [clojure.contrib.reflect :only (get-field)]) 21 | (:import (org.apache.lucene.search ScoreDoc))) 22 | 23 | (def dummy-docs 24 | [{:title "Hacker News" 25 | :description "A community-driven news and discussion site." 26 | :category "technology"} 27 | {:title "Planet Clojure" 28 | :description "Aggregates Clojure-related weblog posts." 29 | :category "clojure"} 30 | {:title "Planet Python" 31 | :description "Aggregates Python-related weblog posts." 32 | :category "python"} 33 | {:title "Planet Java.org" 34 | :description "Aggregates Java-related weblog posts." 35 | :category "java"} 36 | {:title "NOS.nl" 37 | :description "News from the Dutch national broadcaster." 38 | :category "general"}]) 39 | 40 | (deftest test-create-analyzer 41 | (testing "test if Lucene analyzers are created correctly." 42 | (is (= (class (create-analyzer)) 43 | org.apache.lucene.analysis.standard.StandardAnalyzer)))) 44 | 45 | (deftest test-create-directory 46 | (testing "test if Lucene directories are created correctly." 47 | (is (= (class (create-directory :RAM)) 48 | org.apache.lucene.store.RAMDirectory)) 49 | (let [directory (create-directory "/tmp/test")] 50 | (is (= (class directory) org.apache.lucene.store.NIOFSDirectory)) 51 | (is (or 52 | (= (str (.getFile directory)) "/tmp/test") 53 | (= (str (.getFile directory)) "/private/tmp/test")))))) 54 | 55 | (deftest test-create-index-reader 56 | (testing "test if Lucene IndexReaders are created correctly." 57 | (let [dir (create-directory :RAM)] 58 | ; write to index to avoid no segments file error 59 | (do (write-index! dir dummy-docs)) 60 | (is (= (class (create-index-reader dir)) 61 | org.apache.lucene.index.ReadOnlyDirectoryReader))))) 62 | 63 | (deftest test-create-field 64 | (testing "test if create-field properly creates a Lucene Field" 65 | (let [field (create-field "foo" "bar")] 66 | (is (= (.name field) "foo")) 67 | (is (= (.stringValue field) "bar")) 68 | (is (.isIndexed field)) 69 | (is (.isStored field)) 70 | (is (.isTokenized field))) 71 | 72 | (let [field (create-field "foo" "bar" :stored :analyzed)] 73 | (is (= (.name field) "foo")) 74 | (is (= (.stringValue field) "bar")) 75 | (is (.isIndexed field)) 76 | (is (.isStored field)) 77 | (is (.isTokenized field))) 78 | 79 | (let [field (create-field "foo" "bar" :stored)] 80 | (is (= (.name field) "foo")) 81 | (is (= (.stringValue field) "bar")) 82 | (is (.isIndexed field)) 83 | (is (.isStored field)) 84 | (is (not (.isTokenized field)))) 85 | 86 | (let [field (create-field "foo" "bar" :stored :dont-index)] 87 | (is (= (.name field) "foo")) 88 | (is (= (.stringValue field) "bar")) 89 | (is (not (.isIndexed field))) 90 | (is (.isStored field)) 91 | (is (not (.isTokenized field)))) 92 | 93 | 94 | (let [field (create-field "foo" "bar" :analyzed)] 95 | (is (= (.name field) "foo")) 96 | (is (= (.stringValue field) "bar")) 97 | (is (.isIndexed field)) 98 | (is (not (.isStored field))) 99 | (is (.isTokenized field))))) 100 | 101 | (deftest test-create-document 102 | (testing "Check if a document is correctly tranlated to a Lucene doc" 103 | (let [document (create-document (first dummy-docs))] 104 | 105 | (is (= (class document) org.apache.lucene.document.Document)) 106 | 107 | (let [field (.getField document "title")] 108 | (is (= (.name field) "title")) 109 | (is (= (.stringValue field) "Hacker News")) 110 | (is (.isStored field)) 111 | (is (not (.isIndexed field))) 112 | (is (not (.isTokenized field)))) 113 | 114 | (let [field (.getField document "description")] 115 | (is (= (.name field) "description")) 116 | (is (= (.stringValue field) 117 | "A community-driven news and discussion site.")) 118 | (is (.isStored field)) 119 | (is (not (.isIndexed field))) 120 | (is (not (.isTokenized field)))) 121 | 122 | (let [field (.getField document "category")] 123 | (is (= (.name field) "category")) 124 | (is (= (.stringValue field) "technology")) 125 | (is (.isStored field)) 126 | (is (.isIndexed field)) 127 | (is (not (.isTokenized field))))) 128 | 129 | (let [document (create-document (nth dummy-docs 1))] 130 | 131 | (is (= (class document) org.apache.lucene.document.Document)) 132 | 133 | (let [field (.getField document "title")] 134 | (is (= (.name field) "title")) 135 | (is (= (.stringValue field) "Planet Clojure")) 136 | (is (.isStored field)) 137 | (is (not (.isIndexed field))) 138 | (is (not (.isTokenized field)))) 139 | 140 | (let [field (.getField document "description")] 141 | (is (= (.name field) "description")) 142 | (is (= (.stringValue field) 143 | "Aggregates Clojure-related weblog posts.")) 144 | (is (.isStored field)) 145 | (is (not (.isIndexed field))) 146 | (is (not (.isTokenized field)))) 147 | 148 | (let [field (.getField document "category")] 149 | (is (= (.name field) "category")) 150 | (is (= (.stringValue field) "clojure")) 151 | (is (.isStored field)) 152 | (is (.isIndexed field)) 153 | (is (not (.isTokenized field))))) 154 | 155 | (let [document (create-document (nth dummy-docs 2))] 156 | 157 | (is (= (class document) org.apache.lucene.document.Document)) 158 | 159 | (let [field (.getField document "title")] 160 | (is (= (.name field) "title")) 161 | (is (= (.stringValue field) "Planet Python")) 162 | (is (.isStored field)) 163 | (is (not (.isIndexed field))) 164 | (is (not (.isTokenized field)))) 165 | 166 | (let [field (.getField document "description")] 167 | (is (= (.name field) "description")) 168 | (is (= (.stringValue field) 169 | "Aggregates Python-related weblog posts.")) 170 | (is (.isStored field)) 171 | (is (not (.isIndexed field))) 172 | (is (not (.isTokenized field)))) 173 | 174 | (let [field (.getField document "category")] 175 | (is (= (.name field) "category")) 176 | (is (= (.stringValue field) "python")) 177 | (is (.isStored field)) 178 | (is (.isIndexed field)) 179 | (is (not (.isTokenized field))))) 180 | 181 | (let [document (create-document (nth dummy-docs 3))] 182 | 183 | (is (= (class document) org.apache.lucene.document.Document)) 184 | 185 | (let [field (.getField document "title")] 186 | (is (= (.name field) "title")) 187 | (is (= (.stringValue field) "Planet Java.org")) 188 | (is (.isStored field)) 189 | (is (not (.isIndexed field))) 190 | (is (not (.isTokenized field)))) 191 | 192 | (let [field (.getField document "description")] 193 | (is (= (.name field) "description")) 194 | (is (= (.stringValue field) 195 | "Aggregates Java-related weblog posts.")) 196 | (is (.isStored field)) 197 | (is (not (.isIndexed field))) 198 | (is (not (.isTokenized field)))) 199 | 200 | (let [field (.getField document "category")] 201 | (is (= (.name field) "category")) 202 | (is (= (.stringValue field) "java")) 203 | (is (.isStored field)) 204 | (is (.isIndexed field)) 205 | (is (not (.isTokenized field))))) 206 | 207 | (let [document (create-document (nth dummy-docs 4))] 208 | (is (= (class document) org.apache.lucene.document.Document)) 209 | 210 | (let [field (.getField document "title")] 211 | (is (= (.name field) "title")) 212 | (is (= (.stringValue field) "NOS.nl")) 213 | (is (.isStored field)) 214 | (is (not (.isIndexed field))) 215 | (is (not (.isTokenized field)))) 216 | 217 | (let [field (.getField document "description")] 218 | (is (= (.name field) "description")) 219 | (is (= (.stringValue field) 220 | "News from the Dutch national broadcaster.")) 221 | (is (.isStored field)) 222 | (is (not (.isIndexed field))) 223 | (is (not (.isTokenized field)))) 224 | 225 | (let [field (.getField document "category")] 226 | (is (= (.name field) "category")) 227 | (is (= (.stringValue field) "general")) 228 | (is (.isStored field)) 229 | (is (.isIndexed field)) 230 | (is (not (.isTokenized field))))))) 231 | 232 | (deftest test-write-index! 233 | (testing "test indexing process." 234 | ; not really testing anything here (yet), as this functionality 235 | ; is also tested in a lot of other tests (e.g. test-index-reader) 236 | (write-index! (create-directory :RAM) dummy-docs))) 237 | 238 | (deftest test-get-doc-and-get-docs 239 | (testing "test document retrieval from reader." 240 | (let [dir (create-directory :RAM)] 241 | (do (write-index! dir dummy-docs)) 242 | (let [reader (create-index-reader dir) 243 | first-doc (get-doc reader 0) 244 | other-docs (get-docs reader [(ScoreDoc. 1 1.0) 245 | (ScoreDoc. 2 1.0) 246 | (ScoreDoc. 3 1.0) 247 | (ScoreDoc. 4 1.0)])] 248 | 249 | (is (= (.get first-doc "title") "Hacker News")) 250 | (is (= (.get first-doc "description") 251 | "A community-driven news and discussion site.")) 252 | (is (= (.get first-doc "category") "technology")) 253 | 254 | (is (= (.get (first other-docs) "title") "Planet Clojure")) 255 | (is (= (.get (first other-docs) "description") 256 | "Aggregates Clojure-related weblog posts.")) 257 | (is (= (.get (first other-docs) "category") "clojure")) 258 | 259 | (is (= (.get (nth other-docs 1) "title") "Planet Python")) 260 | (is (= (.get (nth other-docs 1) "description") 261 | "Aggregates Python-related weblog posts.")) 262 | (is (= (.get (nth other-docs 1) "category") "python")) 263 | 264 | (is (= (.get (nth other-docs 2) "title") "Planet Java.org")) 265 | (is (= (.get (nth other-docs 2) "description") 266 | "Aggregates Java-related weblog posts.")) 267 | (is (= (.get (nth other-docs 2) "category") "java")) 268 | 269 | (is (= (.get (nth other-docs 3) "title") "NOS.nl")) 270 | (is (= (.get (nth other-docs 3) "description") 271 | "News from the Dutch national broadcaster.")) 272 | (is (= (.get (nth other-docs 3) "category") "general")))))) 273 | 274 | (deftest test-create-filter 275 | (testing "test if filters are constructed properly." 276 | (is (nil? (create-filter {}))) 277 | (let [flt (create-filter {:category "technology"}) 278 | ; get the corresponding Term object from the 279 | ; QueryWrapperFilter's private "query" field 280 | term (.getTerm (get-field 281 | org.apache.lucene.search.QueryWrapperFilter 282 | "query" 283 | flt))] 284 | 285 | (is (= (class flt) org.apache.lucene.search.QueryWrapperFilter)) 286 | (is (= (.field term) "category")) 287 | (is (= (.text term) "technology"))) 288 | 289 | ; check if filters are correctly converted to lowercase 290 | (let [flt (create-filter {:category "TecHnOlOgY"}) 291 | term (.getTerm (get-field 292 | org.apache.lucene.search.QueryWrapperFilter 293 | "query" 294 | flt))] 295 | 296 | (is (= (class flt) org.apache.lucene.search.QueryWrapperFilter)) 297 | (is (= (.field term) "category")) 298 | (is (= (.text term) "technology"))))) 299 | 300 | (deftest test-search 301 | (testing "testing search." 302 | (let [dir (create-directory :RAM)] 303 | 304 | (do (write-index! dir dummy-docs)) 305 | (let [reader (create-index-reader dir) 306 | analyzer (create-analyzer)] 307 | 308 | (let [result (search "planet" nil 5 reader analyzer) 309 | docs (get-docs reader (:docs result))] 310 | (is (= (:total-hits result) 3)) 311 | 312 | (is (= (.get (first docs) "title") "Planet Clojure")) 313 | (is (= (.get (first docs) "description") 314 | "Aggregates Clojure-related weblog posts.")) 315 | (is (= (.get (first docs) "category") "clojure")) 316 | 317 | (is (= (.get (nth docs 1) "title") "Planet Python")) 318 | (is (= (.get (nth docs 1) "description") 319 | "Aggregates Python-related weblog posts.")) 320 | (is (= (.get (nth docs 1) "category") "python")) 321 | 322 | (is (= (.get (last docs) "title") "Planet Java.org")) 323 | (is (= (.get (last docs) "description") 324 | "Aggregates Java-related weblog posts.")) 325 | (is (= (.get (last docs) "category") "java")) 326 | 327 | (let [flt (create-filter {:category "clojure"}) 328 | result (search "planet" flt 5 reader analyzer) 329 | document (first (get-docs reader (:docs result))) 330 | term (.getTerm (get-field 331 | org.apache.lucene.search.QueryWrapperFilter 332 | "query" 333 | flt))] 334 | ; there is only one document with "clojure" as the value of 335 | ; the category field, so I expect to get a single result: 336 | (is (= (:total-hits result) 1)) 337 | 338 | (is (= (.get document "title") "Planet Clojure")) 339 | (is (= (.get document "description") 340 | "Aggregates Clojure-related weblog posts.")) 341 | (is (= (.get document "category") "clojure")) 342 | 343 | ; this is redundant with the test for create-filter, but 344 | ; it never hurts to check again. 345 | (is (= (class flt) org.apache.lucene.search.QueryWrapperFilter)) 346 | (is (= (.field term) "category")) 347 | (is (= (.text term) "clojure"))) 348 | 349 | ; filters are case-sensitive, but create-filter 350 | ; function uses lower-case on the value, so this 351 | ; should work: 352 | (let [flt (create-filter {:category "Clojure"}) 353 | result (search "planet" flt 5 reader analyzer)] 354 | (is (= (:total-hits result) 1)))))))) 355 | --------------------------------------------------------------------------------