├── .gitignore ├── LICENSE ├── README.md ├── build.xml ├── facet.properties ├── misc ├── README.txt ├── queryExperiments.csv ├── userQuestions.csv ├── userStudy.csv └── userStudyRaw.csv ├── src └── cl │ └── uchile │ └── dcc │ └── facet │ ├── core │ ├── CacheBuilder.java │ ├── CacheHandler.java │ ├── DataFields.java │ ├── IndexData.java │ ├── IndexHandler.java │ ├── IndexInstances.java │ ├── IndexProperties.java │ ├── IndexValues.java │ ├── Indexer.java │ ├── InstancesFields.java │ ├── PropertiesFields.java │ ├── PropertiesStatistics.java │ ├── RankData.java │ ├── RankHandler.java │ ├── ScoreBoostsOperator.java │ ├── SearchData.java │ ├── SearchInstances.java │ ├── SearchProperties.java │ ├── SearchValues.java │ ├── UpdateBoosts.java │ └── ValuesFields.java │ ├── testing │ ├── DataEntry.java │ ├── PropertyEntry.java │ ├── RandomSearch.java │ ├── RandomWeightList.java │ ├── SearchStats.java │ └── SearchThread.java │ └── web │ ├── ApiServlet.java │ ├── CodeNameValue.java │ ├── DataServlet.java │ ├── Entry.java │ ├── InstancesServlet.java │ ├── PropertiesServlet.java │ └── SearchServlet.java ├── toolinfo.json └── web ├── WEB-INF └── web.xml ├── about.html ├── css ├── favicon.ico ├── logoBM.png └── styles.css ├── index.jsp ├── query.jsp ├── results.jsp └── toolinfo.json /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | out/ 4 | dist/ 5 | lib/ 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraFa 2 | Faceted Browsing over Wikidata triples 3 | 4 | ## Use the system 5 | 6 | The system is currently running on: http://grafa.dcc.uchile.cl 7 | 8 | ## Installation 9 | 10 | ### Required libraries: 11 | 12 | * Lucene 6.5 13 | * Tomcat 7 14 | * RDF4J 2.2 15 | 16 | ### Config file 17 | 18 | The file facet.properties contains the following options: 19 | 20 | * languages: list of supported languages (using the same language tag from the dataset) 21 | * entityIRI: prefix for entities 22 | * propertyIRI: prefix for properties 23 | * labelIRI: predicate of the triple containing the label 24 | * descriptionIRI: predicate of the triple containing the description 25 | * alt_labelIRI: predicate of the triples containing alt labels or aliases 26 | * instanceOf: IRI (with no prefix) of the type or instance of predicate 27 | * image: IRI (with no prefix) of the image predicate 28 | * entityPrefix: prefix of all entities (excluding the domain prefix) 29 | 30 | ### Binaries 31 | 32 | The included build.xml generates several jar files. These are the most important: 33 | 34 | * index.jar: Creates an index from all entities based on an NT file 35 | * rank.jar: Computes the graph and the Page Rank value of each entity 36 | * boosts.jar: Copies the index from index.jar but adding the ranks from rank.jar 37 | * cache.jar: Generates a list of all queries with a large results set that need caching. 38 | * instances.jar: Creates an index of all instances/types of the main index and also caches the results from the list of cache.jar 39 | * values.jar: Creates the cache for values for every property of every query that needs caching. This process may take a couple of days. 40 | * grafa.war: Tomcat Webapp's war file. The config file need the directory of the indexes. 41 | * The other jar files are for debugging and generate statistics (there may also be unused classes) 42 | -------------------------------------------------------------------------------- /build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 331 | 332 | 333 | 334 | -------------------------------------------------------------------------------- /facet.properties: -------------------------------------------------------------------------------- 1 | languages = en,es 2 | 3 | entityIRI = http://www.wikidata.org/entity/ 4 | propertyIRI = http://www.wikidata.org/prop/direct/ 5 | 6 | labelIRI = http://www.w3.org/2000/01/rdf-schema#label 7 | descriptionIRI = http://schema.org/description 8 | alt_labelIRI = http://www.w3.org/2004/02/skos/core#altLabel 9 | 10 | instanceOf = P31 11 | image = P18 12 | 13 | entityPrefix = Q -------------------------------------------------------------------------------- /misc/README.txt: -------------------------------------------------------------------------------- 1 | This folder contains data for the paper "GraFa: Scalable Faceted Browsing for RDF Graphs using Selective Caching", under review at ISWC. 2 | 3 | queryExperiments.csv describes the performance experiments run by emulating user sessions: 4 | - URL: indicates the URL of the query 5 | - time: response time in milliseconds 6 | - results: number of entities in the full result set 7 | - depth: how many facet selections have been performed 8 | - properties: how long the selection of a property value takes 9 | - size: the result size in bytes 10 | 11 | userStudy.csv describes the task-based user evaluation 12 | - ID: user id 13 | - Question: question/task ID (see userQuestions.csv in the same folder) 14 | - System: G: Grafa, W: Wikidata Query Service 15 | - Correct: Y: Yes, N: No, B: Blank (no response given) 16 | - Time: Time taken (ms) from question being displayed to response being given or question being skipped 17 | 18 | userStudyRaw.csv describes the responses given by users in the order received 19 | - User: user id 20 | - Key: [question id][A: Wikidata QueryService, B: Grafa][S: Skip, F:Answer submitted] 21 | - Response: value of answer submitted (URL of results page) 22 | - CurrentTime: UNIX time of response/skip 23 | 24 | userQuestions.csv: 25 | - ID: question ID 26 | - Text: question text 27 | -------------------------------------------------------------------------------- /misc/userQuestions.csv: -------------------------------------------------------------------------------- 1 | ID,Text 2 | 1,Plays 3 | 2,Lakes in Cameroon 4 | 3,Lighthouses in Norway 5 | 4,Popes 6 | 5,Women born in Wales 7 | 6,Papers about Wikidata 8 | 7,Law & Order episodes 9 | 8,Fictional characters from Marvel Universe 10 | 9,People dying by burning 11 | 10,Mosquito species 12 | -------------------------------------------------------------------------------- /misc/userStudy.csv: -------------------------------------------------------------------------------- 1 | ID,Question,System,Correct?,Time (ms) 2 | 0,2,G,B,401513 3 | 0,4,G,Y,135592 4 | 0,6,G,Y,47308 5 | 0,8,G,B,359224 6 | 0,10,G,Y,339901 7 | 1,1,G,Y,102621 8 | 1,3,G,B,91147 9 | 1,5,G,B,106830 10 | 1,7,G,Y,139460 11 | 1,9,G,B,114053 12 | 2,2,G,Y,114138 13 | 2,4,G,N,63790 14 | 2,6,G,Y,85724 15 | 2,8,G,Y,86731 16 | 2,10,G,B,63647 17 | 3,1,G,Y,165617 18 | 3,3,G,N,258310 19 | 3,5,G,B,157543 20 | 3,7,G,B,125600 21 | 3,9,G,B,97950 22 | 4,2,G,Y,337575 23 | 4,4,G,Y,228607 24 | 4,6,G,Y,76340 25 | 4,8,G,B,113818 26 | 4,10,G,B,933 27 | 5,1,G,N,134842 28 | 5,3,G,N,260785 29 | 5,5,G,N,365343 30 | 5,7,G,N,199663 31 | 5,9,G,N,263415 32 | 6,2,G,Y,172518 33 | 6,4,G,B,272890 34 | 6,6,G,Y,106071 35 | 6,8,G,Y,243005 36 | 6,10,G,B,339881 37 | 8,2,G,N,137118 38 | 8,4,G,N,83949 39 | 8,6,G,N,69032 40 | 8,8,G,N,32944 41 | 8,10,G,N,282713 42 | 9,1,G,Y,200891 43 | 9,3,G,N,105043 44 | 9,5,G,B,248262 45 | 9,7,G,Y,39151 46 | 9,9,G,Y,154123 47 | 10,2,G,Y,143815 48 | 10,4,G,B,189640 49 | 10,6,G,Y,160450 50 | 10,8,G,B,340821 51 | 10,10,G,B,234091 52 | 11,1,G,Y,49217 53 | 11,3,G,N,159730 54 | 11,5,G,B,236993 55 | 11,7,G,Y,60019 56 | 11,9,G,B,247436 57 | 0,1,W,Y,178167 58 | 0,3,W,Y,117085 59 | 0,5,W,N,195752 60 | 0,7,W,Y,161530 61 | 0,9,W,Y,228351 62 | 1,2,W,Y,94567 63 | 1,4,W,Y,38736 64 | 1,6,W,B,146697 65 | 1,8,W,Y,84004 66 | 1,10,W,Y,158216 67 | 2,1,W,Y,70293 68 | 2,3,W,Y,110671 69 | 2,5,W,N,180858 70 | 2,7,W,B,107491 71 | 2,9,W,B,61608 72 | 3,2,W,B,371240 73 | 3,4,W,Y,192531 74 | 3,6,W,Y,134135 75 | 3,8,W,Y,71817 76 | 3,10,W,Y,24521 77 | 4,1,W,Y,120852 78 | 4,3,W,N,404710 79 | 4,5,W,Y,206219 80 | 4,7,W,Y,245255 81 | 4,9,W,B,353760 82 | 5,2,W,Y,188551 83 | 5,4,W,N,186000 84 | 5,6,W,Y,86124 85 | 5,8,W,Y,82985 86 | 5,10,W,N,193016 87 | 6,1,W,Y,103057 88 | 6,3,W,Y,46005 89 | 6,5,W,Y,186673 90 | 6,7,W,Y,145518 91 | 6,9,W,N,251342 92 | 8,1,W,Y,116548 93 | 8,3,W,B,108008 94 | 8,5,W,N,122564 95 | 8,7,W,N,48148 96 | 8,9,W,B,214863 97 | 9,2,W,Y,85987 98 | 9,4,W,N,29778 99 | 9,6,W,Y,54773 100 | 9,8,W,Y,78280 101 | 9,10,W,Y,140363 102 | 10,1,W,Y,187569 103 | 10,3,W,Y,49740 104 | 10,5,W,N,256042 105 | 10,7,W,N,150525 106 | 10,9,W,Y,145012 107 | 11,2,W,Y,151730 108 | 11,4,W,Y,23576 109 | 11,6,W,Y,68146 110 | 11,8,W,Y,78436 111 | 11,10,W,Y,178097 112 | -------------------------------------------------------------------------------- /misc/userStudyRaw.csv: -------------------------------------------------------------------------------- 1 | User,Key,Response,CurrentTime 2 | 0,1AS,-,1511356851799 3 | 1,1BS,-,1511356855697 4 | 2,1AS,-,1511356855808 5 | 3,1BS,-,1511356864913 6 | 4,1AS,-,1511356866635 7 | 5,1BS,-,1511356882775 8 | 6,1AS,-,1511356892312 9 | 2,1AF,https://query.wikidata.org/#SELECT%20%3Fplay%20%3FplayLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fplay%20wdt%3AP31%20wd%3AQ25379.%0A%7D%0ALIMIT%20100,1511356926101 10 | 2,2AS,-,1511356930599 11 | 7,1BS,-,1511356942678 12 | 7,1BF,,1511356954729 13 | 1,1BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=&instance=Q25379,1511356958318 14 | 7,2BS,-,1511356958911 15 | 1,2BS,-,1511356969418 16 | 8,1AS,-,1511356971654 17 | 4,1AF,https://query.wikidata.org/#SELECT %3Fobra_de_teatro %3Fobra_de_teatroLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen\. }%0A %3Fobra_de_teatro wdt%3AP31 wd%3AQ25379.%0A}%0ALIMIT 100,1511356987487 18 | 4,2AS,-,1511356992483 19 | 6,1AF,https://query.wikidata.org/#SELECT%20%3Fobra_de_teatro%20%3Fobra_de_teatroLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fobra_de_teatro%20wdt%3AP31%20wd%3AQ25379.%0A%7D%0ALIMIT%20100,1511356995369 20 | 6,2AS,-,1511357003085 21 | 5,1BF,http://facet.dcc.uchile.cl/grafa/search?keyword=Obras+de+teatro&instance=,1511357017617 22 | 0,1AF,https://query.wikidata.org/#SELECT%20%3FO%0AWHERE%20%7B%0A%20%20%3FO%20wdt%3AP31%20wd%3AQ25379%20%20%0A%7D,1511357029966 23 | 3,1BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=&instance=Q25379,1511357030530 24 | 0,2AS,-,1511357032909 25 | 5,2BS,-,1511357033942 26 | 3,2BS,-,1511357034003 27 | 2,2AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q23397&properties=P17%23%23Q1009,1511357044737 28 | 2,3AS,-,1511357046425 29 | 1,2BF,https://query.wikidata.org/#SELECT%20%3Flago%20%3FlagoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Flago%20wdt%3AP31%20wd%3AQ23397.%0A%20%20%3Flago%20wdt%3AP17%20wd%3AQ1009.%0A%7D%0ALIMIT%20100,1511357063985 30 | 1,3BS,-,1511357072979 31 | 8,1AF,http://tinyurl.com/yaqfwyro,1511357088202 32 | 9,1BS,-,1511357090537 33 | 8,2AS,-,1511357090686 34 | 2,3AF,https://query.wikidata.org/#SELECT%20%3Ffaro%20%3FfaroLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Ffaro%20wdt%3AP31%20wd%3AQ39715.%0A%20%20%3Ffaro%20wdt%3AP17%20wd%3AQ20.%0A%7D%0ALIMIT%20100,1511357157096 35 | 2,4AS,-,1511357158872 36 | 1,3BF,,1511357164126 37 | 1,4BS,-,1511357166066 38 | 6,2AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q23397&properties=P17%23%23Q1009,1511357175603 39 | 6,3AS,-,1511357178251 40 | 1,4BF,https://query.wikidata.org/#SELECT%20%3Fpapa%20%3FpapaLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fpapa%20wdt%3AP39%20wd%3AQ19546.%0A%7D%0ALIMIT%20100,1511357204802 41 | 1,5BS,-,1511357206201 42 | 5,2BF,https://query.wikidata.org/#SELECT%20%3FCamer_n%20%3FCamer_nLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FCamer_n%20wdt%3AP17%20wd%3AQ1009.%0A%20%20%3FCamer_n%20wdt%3AP31%20wd%3AQ23397.%0A%7D%0ALIMIT%20100,1511357222493 43 | 2,4AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=&instance=Q16587531,1511357222662 44 | 2,5AS,-,1511357223773 45 | 6,3AF,https://query.wikidata.org/#SELECT%20%3Ffaro%20%3FfaroLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Ffaro%20wdt%3AP31%20wd%3AQ39715.%0A%20%20%3Ffaro%20wdt%3AP17%20wd%3AQ20.%0A%7D%0ALIMIT%20100,1511357224256 46 | 5,3BS,-,1511357225599 47 | 8,2AF,https://www.wikidata.org/wiki/Q125309,1511357227804 48 | 6,4AS,-,1511357230852 49 | 8,3AS,-,1511357233854 50 | 9,1BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=&instance=Q25379,1511357291428 51 | 9,2BS,-,1511357300953 52 | 1,5BF,,1511357313031 53 | 1,6BS,-,1511357317539 54 | 4,2AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q23397&properties=P205%23%23Q1009&properties=&properties=&properties=&properties=,1511357330058 55 | 4,3AS,-,1511357333028 56 | 10,1AS,-,1511357337418 57 | 8,3AF,,1511357341862 58 | 8,4AS,-,1511357343425 59 | 9,2BF,https://query.wikidata.org/#SELECT%20%3Flago%20%3FlagoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Flago%20wdt%3AP31%20wd%3AQ23397.%0A%20%20%3Flago%20wdt%3AP17%20wd%3AQ1009.%0A%7D%0ALIMIT%20100,1511357386940 60 | 9,3BS,-,1511357391367 61 | 2,5AF,https://query.wikidata.org/#SELECT%20%3Fmujer%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fmujer%20wdt%3AP31%20wd%3AQ467.%0A%20%20%3Fmujer%20wdt%3AP19%20wd%3AQ20.%0A%20%20%0A%7D%0ALIMIT%20100,1511357404631 62 | 3,2BF,,1511357405243 63 | 2,6AS,-,1511357405643 64 | 3,3BS,-,1511357406148 65 | 8,4AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=papa&instance=,1511357427374 66 | 8,5AS,-,1511357428817 67 | 0,2AF,,1511357434422 68 | 0,3AS,-,1511357436172 69 | 1,6BF,,1511357464236 70 | 1,7BS,-,1511357467854 71 | 5,3BF,http://facet.dcc.uchile.cl/grafa/search?keyword=Faros+en+Noregua&instance=&properties=,1511357486384 72 | 2,6AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q13442814&properties=P921%23%23Q2013,1511357491367 73 | 5,4BS,-,1511357491650 74 | 2,7AS,-,1511357492603 75 | 9,3BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q843152&properties=P17%23%23Q20,1511357496410 76 | 9,4BS,-,1511357498784 77 | 6,4AF,,1511357503742 78 | 6,5AS,-,1511357506728 79 | 10,1AF,https://query.wikidata.org/#SELECT%20%3Fobra_de_teatro%20%3Fobra_de_teatroLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%3Fobra_de_teatro%20wdt%3AP31%20wd%3AQ25379.%0A%7D%0ALIMIT%20100,1511357524987 80 | 9,4BF,https://query.wikidata.org/#SELECT%20%3Flago%20%3FlagoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Flago%20wdt%3AP31%20wd%3AQ23397.%0A%20%20%3Flago%20wdt%3AP17%20wd%3AQ1009.%0A%7D%0ALIMIT%20100,1511357528562 81 | 10,2AS,-,1511357529051 82 | 9,5BS,-,1511357530605 83 | 8,5AF,http://tinyurl.com/y9l7llk4,1511357551381 84 | 8,6AS,-,1511357553168 85 | 0,3AF,https://query.wikidata.org/#SELECT%20%3FO%20%3FOLabel%20WHERE%20%7B%0A%20%20%3FO%20wdt%3AP31%20wd%3AQ39715.%0A%20%20%3FO%20wdt%3AP17%20wd%3AQ20.%0A%7D,1511357553257 86 | 0,4AS,-,1511357554662 87 | 2,7AF,,1511357600094 88 | 2,8AS,-,1511357601473 89 | 1,7BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q21191270&properties=P179%23%23Q321423,1511357607314 90 | 1,8BS,-,1511357608537 91 | 8,6AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=wikidata&instance=,1511357622200 92 | 8,7AS,-,1511357623556 93 | 3,3BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q843152&properties=&properties=&properties=&properties=P1855%23%23Q3378290&properties=P2302%23%23Q21503250,1511357664458 94 | 3,4BS,-,1511357665939 95 | 8,7AF,http://tinyurl.com/ybx9dorj,1511357671704 96 | 10,2AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q23397&properties=P17%23%23Q1009,1511357672866 97 | 8,8AS,-,1511357672969 98 | 10,3AS,-,1511357675972 99 | 5,4BF,https://query.wikidata.org/#SELECT%20%3FPapas%20%3FPapasLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%3FPapas%20wdt%3AP1889%20wd%3AQ2050567.%0A%7D%0ALIMIT%20100,1511357677650 100 | 5,5BS,-,1511357680406 101 | 2,8AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q95074&properties=P1080%23%23Q2050824,1511357688204 102 | 2,9AS,-,1511357689387 103 | 0,4AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q5&properties=P39%23%23Q19546,1511357690254 104 | 0,5AS,-,1511357691462 105 | 1,8BF,https://query.wikidata.org/#SELECT%20%3Fpersonaje_de_ficci_n%20%3Fpersonaje_de_ficci_nLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fpersonaje_de_ficci_n%20wdt%3AP31%20wd%3AQ95074.%0A%20%20%3Fpersonaje_de_ficci_n%20wdt%3AP1080%20wd%3AQ931597.%0A%7D%0ALIMIT%20100,1511357692541 106 | 6,5AF,https://query.wikidata.org/#SELECT%20%3Fmujer%20%3FmujerLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fmujer%20wdt%3AP21%20wd%3AQ6581072.%0A%20%20%3Fmujer%20wdt%3AP19%20wd%3AQ25.%0A%7D%0ALIMIT%20100,1511357693401 107 | 1,9BS,-,1511357694224 108 | 6,6AS,-,1511357695373 109 | 8,8AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=marvel&instance=,1511357705913 110 | 8,9AS,-,1511357707115 111 | 10,3AF,https://query.wikidata.org/#SELECT%20%3Ffaro%20%3FfaroLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Ffaro%20wdt%3AP31%20wd%3AQ39715.%0A%20%20%3Ffaro%20wdt%3AP17%20wd%3AQ20.%0A%7D%0ALIMIT%20100,1511357725712 112 | 10,4AS,-,1511357727596 113 | 4,3AF,https://query.wikidata.org/#SELECT %3Fobra_de_teatro %3Fobra_de_teatroLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen. }%0A %3Fobra_de_teatro wdt%3AP31 wd%3AQ28517170.%0A %3Fobra_de_teatro wdt%3AP17 wd%3AQ20.%0A}%0ALIMIT 100,1511357737738 114 | 4,4AS,-,1511357742645 115 | 2,9AF,,1511357750995 116 | 2,10AS,-,1511357752156 117 | 9,5BF,,1511357778867 118 | 9,6BS,-,1511357780710 119 | 6,6AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q13442814&properties=P921%23%23Q2013,1511357801444 120 | 6,7AS,-,1511357803565 121 | 1,9BF,,1511357808277 122 | 1,10BS,-,1511357810022 123 | 2,10AF,,1511357815803 124 | 9,6BF,https://query.wikidata.org/#SELECT%20%3Fart_culo_cient_fico%20%3Fart_culo_cient_ficoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%3Fart_culo_cient_fico%20wdt%3AP31%20wd%3AQ13442814.%0A%20%20%3Fart_culo_cient_fico%20wdt%3AP921%20wd%3AQ2013.%0A%7D%0ALIMIT%20100,1511357835483 125 | 9,7BS,-,1511357837326 126 | 3,4BF,https://query.wikidata.org/#SELECT *%0AWHERE {%0A %3Fx .%0A },1511357858470 127 | 3,5BS,-,1511357859516 128 | 9,7BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q21191270&properties=P179%23%23Q321423,1511357876477 129 | 9,8BS,-,1511357878470 130 | 0,5AF,https://query.wikidata.org/#SELECT%20%3Finstancia_de%20%3Finstancia_deLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Finstancia_de%20wdt%3AP1269%20wd%3AQ467.%0A%20%20%3Finstancia_de%20wdt%3AP19%20wd%3AQ25.%0A%7D%0ALIMIT%20100,1511357887214 131 | 0,6AS,-,1511357888537 132 | 10,4AF,,1511357917236 133 | 10,5AS,-,1511357918613 134 | 8,9AF,,1511357921978 135 | 8,10AS,-,1511357923301 136 | 0,6AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q13442814&properties=P921%23%23Q2013,1511357935845 137 | 0,7AS,-,1511357937348 138 | 6,7AF,https://query.wikidata.org/#SELECT%20%3FLaw___Order%20%3FLaw___OrderLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FLaw___Order%20wdt%3AP179%20wd%3AQ321423.%0A%20%20%0A%20%20%3FLaw___Order%20wdt%3AP31%20wd%3AQ21191270.%0A%7D%0ALIMIT%20100,1511357949083 139 | 6,8AS,-,1511357951325 140 | 9,8BF,https://query.wikidata.org/#SELECT%20%3Fpersonaje_de_ficci_n%20%3Fpersonaje_de_ficci_nLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%0A%20%20%3Fpersonaje_de_ficci_n%20wdt%3AP31%20wd%3AQ95074.%0A%20%20%0A%20%20%3Fpersonaje_de_ficci_n%20wdt%3AP1080%20wd%3AQ931597.%0A%7D%0ALIMIT%20100,1511357956750 141 | 9,9BS,-,1511357958070 142 | 1,10BF,https://query.wikidata.org/#SELECT%20%3FCulicidae%20%3FCulicidaeLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FCulicidae%20wdt%3AP171%20wd%3AQ7367.%0A%7D%0ALIMIT%20100,1511357968238 143 | 4,4AF,http://facet.dcc.uchile.cl/grafa/search?instance=Q5&properties=P106%23%23Q740369,1511357971252 144 | 4,5AS,-,1511357972461 145 | 3,5BF,,1511358017059 146 | 3,6BS,-,1511358018013 147 | 5,5BF,http://facet.dcc.uchile.cl/grafa/search?keyword=Mujeres+nacidas+en+Gales&instance=,1511358045749 148 | 5,6BS,-,1511358047252 149 | 0,7AF,https://query.wikidata.org/#SELECT%20%3Finstance_of%20%3Finstance_ofLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Finstance_of%20wdt%3AP31%20wd%3AQ21191270.%0A%20%20%3Finstance_of%20wdt%3AP179%20wd%3AQ321423.%0A%7D%0ALIMIT%20100,1511358098878 150 | 0,8AS,-,1511358099940 151 | 9,9BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q5&properties=P509%23%23Q170518,1511358112193 152 | 9,10BS,-,1511358116565 153 | 5,6BF,https://query.wikidata.org/#SELECT%20%3Fart_culo_cient_fico%20%3Fart_culo_cient_ficoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fart_culo_cient_fico%20wdt%3AP31%20wd%3AQ13442814.%0A%20%20%3Fart_culo_cient_fico%20wdt%3AP921%20wd%3AQ2013.%0A%7D%0ALIMIT%20100,1511358133376 154 | 5,7BS,-,1511358135171 155 | 11,1BS,-,1511358139535 156 | 3,6BF,https://query.wikidata.org/#SELECT %3Fart_culo_cient_fico %3Fart_culo_cient_ficoLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen. }%0A %3Fart_culo_cient_fico wdt%3AP31 wd%3AQ13442814.%0A %0A %3Fart_culo_cient_fico wdt%3AP921 wd%3AQ2013.%0A}%0ALIMIT 100,1511358152148 157 | 3,7BS,-,1511358153651 158 | 10,5AF,https://query.wikidata.org/#SELECT%20%3Fmujer%20%3FmujerLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%3Fmujer%20wdt%3AP180%20wd%3AQ467.%0A%20%20%3Fmujer%20wdt%3AP495%20wd%3AQ25.%0A%7D%0ALIMIT%20100,1511358174655 159 | 10,6AS,-,1511358176302 160 | 4,5AF,https://query.wikidata.org/#SELECT %3Fobra_de_teatro %3Fobra_de_teatroLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen. }%0A %3Fobra_de_teatro wdt%3AP31 wd%3AQ5.%0A %3Fobra_de_teatro wdt%3AP19 wd%3AQ25.%0A %3Fobra_de_teatro wdt%3AP21 wd%3AQ467.%0A}%0ALIMIT 100,1511358178680 161 | 4,6AS,-,1511358180607 162 | 11,1BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=&instance=Q25379,1511358188752 163 | 6,8AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q95074&properties=P1080%23%23Q931597&properties=,1511358194330 164 | 11,2BS,-,1511358194468 165 | 6,9AS,-,1511358197126 166 | 8,10AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=mosquito&instance=,1511358206014 167 | 9,10BF,https://query.wikidata.org/#SELECT%20%3FCulicidae%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%3FCulicidae%20wdt%3AP171%20wd%3AQ7367.%0A%20%20%0A%7D%0ALIMIT%20100,1511358256928 168 | 4,6AF,http://facet.dcc.uchile.cl/grafa/search?instance=Q13442814&properties=P921%23%23Q2013,1511358256947 169 | 4,7AS,-,1511358258542 170 | 3,7BF,,1511358279251 171 | 3,8BS,-,1511358280236 172 | 5,7BF,http://facet.dcc.uchile.cl/grafa/search?keyword=Law+%26+Order&lang=es&instance=,1511358334834 173 | 5,8BS,-,1511358336624 174 | 10,6AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q13442814&properties=P921%23%23Q2013,1511358336752 175 | 10,7AS,-,1511358338347 176 | 11,2BF,https://query.wikidata.org/#SELECT%20%3FCameroon%20%3FCameroonLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FCameroon%20wdt%3AP17%20wd%3AQ1009.%0A%20%20%3FCameroon%20wdt%3AP31%20wd%3AQ23397.%0A%7D%0ALIMIT%20100,1511358346198 177 | 11,3BS,-,1511358348488 178 | 3,8BF,https://query.wikidata.org/#SELECT %3Fpersonaje_de_ficci_n %3Fpersonaje_de_ficci_nLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen. }%0A %3Fpersonaje_de_ficci_n wdt%3AP31 wd%3AQ95074.%0A OPTIONAL { }%0A %3Fpersonaje_de_ficci_n wdt%3AP1080 wd%3AQ931597.%0A}%0ALIMIT 100,1511358352053 179 | 3,9BS,-,1511358353003 180 | 5,8BF,https://query.wikidata.org/#SELECT%20%3FUniverso_Marvel%20%3FUniverso_MarvelLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FUniverso_Marvel%20wdt%3AP1080%20wd%3AQ931597.%0A%20%20%3FUniverso_Marvel%20wdt%3AP31%20wd%3AQ95074.%0A%7D%0ALIMIT%20100,1511358419609 181 | 5,9BS,-,1511358420909 182 | 6,9AF,https://query.wikidata.org/#SELECT%20%3Fpersona%20%3FpersonaLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%3Fpersona%20wdt%3AP509%20wd%3AQ170518.%0A%20%20%3Fpersona%20wdt%3AP360%20wd%3AQ5.%0A%7D%0ALIMIT%20100,1511358448468 183 | 12,1AS,-,1511358449473 184 | 6,10AS,-,1511358450919 185 | 3,9BF,,1511358450953 186 | 3,10BS,-,1511358451892 187 | 0,8AF,,1511358459164 188 | 0,9AS,-,1511358460116 189 | 3,10BF,https://query.wikidata.org/#SELECT %3FCulicidae %3FCulicidaeLabel WHERE {%0A SERVICE wikibase%3Alabel { bd%3AserviceParam wikibase%3Alanguage [AUTO_LANGUAGE]%2Cen. }%0A %3FCulicidae wdt%3AP171 wd%3AQ7367.%0A}%0ALIMIT 100,1511358476413 190 | 10,7AF,https://query.wikidata.org/#SELECT%20%3Fepisodio%20%3FepisodioLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%3Fepisodio%20wdt%3AP179%20wd%3AQ321423.%0A%20%20%3Fepisodio%20wdt%3AP360%20wd%3AQ1983062.%0A%7D%0ALIMIT%20100,1511358488872 191 | 10,8AS,-,1511358490624 192 | 4,7AF,,1511358503797 193 | 4,8AS,-,1511358507608 194 | 11,3BF,http://facet.dcc.uchile.cl/grafa/search?keyword=Faro&lang=es&instance=&properties=P17%23%23Q20,1511358508218 195 | 11,4BS,-,1511358509788 196 | 11,4BF,https://query.wikidata.org/#SELECT%20%3Fpope%20%3FpopeLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fpope%20wdt%3AP39%20wd%3AQ19546.%0A%7D%0ALIMIT%20100,1511358533364 197 | 11,5BS,-,1511358535734 198 | 4,8AF,personaje de ficci??n,1511358621426 199 | 12,1AF,http://tinyurl.com/ydh9f8kl,1511358622202 200 | 12,2AS,-,1511358623354 201 | 4,9AS,-,1511358647119 202 | 5,9BF,http://facet.dcc.uchile.cl/grafa/search?lang=es&keyword=Personas+que+murieron+quemadas+&instance=,1511358684324 203 | 5,10BS,-,1511358685943 204 | 0,9AF,https://query.wikidata.org/#SELECT%20%3Fhuman%20%3FhumanLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3Fhuman%20wdt%3AP31%20wd%3AQ5.%0A%20%20%3Fhuman%20wdt%3AP509%20wd%3AQ3196.%0A%7D%0ALIMIT%20100,1511358688467 205 | 0,10AS,-,1511358689808 206 | 11,5BF,,1511358772727 207 | 11,6BS,-,1511358774351 208 | 6,10AF,,1511358790800 209 | 10,8AF,,1511358831445 210 | 10,9AS,-,1511358833137 211 | 11,6BF,https://query.wikidata.org/#SELECT%20%3Fpaper%20%3FpaperLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%3Fpaper%20wdt%3AP921%20wd%3AQ2013.%0A%20%20%3Fpaper%20wdt%3AP31%20wd%3AQ13442814.%0A%7D%0ALIMIT%20100,1511358842497 212 | 11,7BS,-,1511358843867 213 | 5,10BF,https://query.wikidata.org/#SELECT%20%3Fmosquito%20%3FmosquitoLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%3Fmosquito%20wdt%3AP2959%20wd%3AQ4801985.%0A%20%20%3Fmosquito%20wdt%3AP910%20wd%3AQ8807846.%0A%7D%0ALIMIT%20100,1511358878959 214 | 11,7BF,http://facet.dcc.uchile.cl/grafa/search?keyword=law+%26+order&lang=en&instance=&properties=P31%23%23Q21191270&properties=P179%23%23Q321423,1511358903886 215 | 11,8BS,-,1511358905027 216 | 10,9AF,https://query.wikidata.org/#SELECT%20%3Fquemadura%20%3FquemaduraLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%0A%20%20%3Fquemadura%20wdt%3AP509%20wd%3AQ170518.%0A%20%20%0A%20%20%0A%20%20%3Fquemadura%20wdt%3AP31%20wd%3AQ5.%0A%7D%0ALIMIT%20100,1511358978149 217 | 10,10AS,-,1511358980547 218 | 11,8BF,https://query.wikidata.org/#SELECT%20%3FMarvel_Comics%20%3FMarvel_ComicsLabel%20%3Fpersonajes%20%3FpersonajesLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%3FMarvel_Comics%20wdt%3AP1080%20wd%3AQ931597.%0A%20%20OPTIONAL%20%7B%20%3FMarvel_Comics%20wdt%3AP674%20%3Fpersonajes.%20%7D%0A%7D%0ALIMIT%20100,1511358983463 219 | 11,9BS,-,1511358985371 220 | 4,9AF,,1511359000879 221 | 4,10AS,-,1511359002506 222 | 4,10AF,,1511359003439 223 | 0,10AF,http://facet.dcc.uchile.cl/grafa/search?lang=es&instance=Q16521&properties=P171%23%23Q7367&properties=&properties=,1511359029709 224 | 10,10AF,,1511359214638 225 | 11,9BF,,1511359232807 226 | 11,10BS,-,1511359234445 227 | 11,10BF,https://query.wikidata.org/#SELECT%20%3FCulicidae%20%3FCulicidaeLabel%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%20%20%0A%20%20OPTIONAL%20%7B%20%20%7D%0A%20%20%0A%20%20%0A%20%20%0A%20%20%3FCulicidae%20wdt%3AP171%20wd%3AQ7367.%0A%7D%0ALIMIT%20100,1511359412542 228 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/CacheBuilder.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.index.*; 4 | import org.apache.lucene.store.FSDirectory; 5 | import org.apache.lucene.util.BytesRef; 6 | import org.eclipse.rdf4j.rio.RDFFormat; 7 | import org.eclipse.rdf4j.rio.RDFParser; 8 | import org.eclipse.rdf4j.rio.Rio; 9 | 10 | import java.io.*; 11 | import java.nio.file.Paths; 12 | import java.util.HashMap; 13 | import java.util.List; 14 | import java.util.Map; 15 | import java.util.stream.Collectors; 16 | import java.util.zip.GZIPInputStream; 17 | 18 | public class CacheBuilder { 19 | 20 | private static final int TICKS = 100000; 21 | private static final int M_PRIME = 50000; 22 | 23 | private static List makePOList(String directory) throws IOException { 24 | Map map = new HashMap<>(); 25 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(directory))); 26 | Fields fields = MultiFields.getFields(reader); 27 | Terms poTerms = fields.terms(DataFields.PO.name()); 28 | TermsEnum poIterator = poTerms.iterator(); 29 | BytesRef text; 30 | int read = 0; 31 | while((text = poIterator.next()) != null) { 32 | read++; 33 | if(read%TICKS == 0) 34 | System.out.println(read+" PO values processed..."); 35 | String poCode = text.utf8ToString(); 36 | String[] poSplit = poCode.split("##"); 37 | String value = poSplit[1]; 38 | if(!value.startsWith("Q")) continue; 39 | Term poTerm = new Term(DataFields.PO.name(), poCode); 40 | int frequency = reader.docFreq(poTerm); 41 | if(frequency > M_PRIME) 42 | map.put(poCode, frequency); 43 | } 44 | 45 | System.out.println(read+" PO values processed in total."); 46 | return map.entrySet().stream() 47 | .sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())) 48 | .map(Map.Entry::getKey) 49 | .collect(Collectors.toList()); 50 | } 51 | 52 | public static void main(String[] args) throws IOException { 53 | System.out.println("CacheBuilder"); 54 | System.out.println("Makes a list of all the instances that need a cache"); 55 | System.out.println(); 56 | 57 | if(args.length!=3) { 58 | System.out.println("USAGE: NT_file Index_Directory Output_List"); 59 | System.exit(0); 60 | } 61 | 62 | String ntFilename = args[0]; 63 | String indexDir = args[1]; 64 | String outputFile = args[2]; 65 | 66 | long startTime = System.currentTimeMillis(); 67 | List poList = makePOList(indexDir); 68 | System.err.println("PO List created!"); 69 | System.err.println("INFO: Length of PO List = " + poList.size()); 70 | 71 | InputStream in = new FileInputStream(ntFilename); 72 | if(ntFilename.endsWith(".gz")){ 73 | System.err.println("Input file is gzipped."); 74 | in = new GZIPInputStream(in); 75 | } 76 | Reader reader = new InputStreamReader(in, "UTF-8"); 77 | 78 | RDFParser parser = Rio.createParser(RDFFormat.NTRIPLES); 79 | CacheHandler handler = new CacheHandler(poList); 80 | parser.setRDFHandler(handler); 81 | 82 | System.err.println("Reading NT file..."); 83 | System.err.println("This may take a while..."); 84 | try { 85 | parser.parse(reader, ""); 86 | } catch (Exception e) { 87 | e.printStackTrace(); 88 | throw new IOException(); 89 | } finally { 90 | in.close(); 91 | } 92 | List needsCachingList = handler.getResults(); 93 | long totalTime = ((System.currentTimeMillis() - startTime) / 1000 / 60) + 1; 94 | System.err.println("List created!"); 95 | 96 | System.err.println("Writing results to output file"); 97 | PrintWriter pw = new PrintWriter(new FileWriter(outputFile)); 98 | needsCachingList.forEach(pw::println); 99 | pw.close(); 100 | System.err.println("Complete!"); 101 | System.err.println("Total time = " + totalTime + " min"); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/CacheHandler.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.eclipse.rdf4j.model.Resource; 4 | import org.eclipse.rdf4j.model.Statement; 5 | import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; 6 | 7 | import java.io.FileInputStream; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.util.*; 11 | import java.util.stream.Collectors; 12 | 13 | class CacheHandler extends AbstractRDFHandler { 14 | 15 | private static final int M_PRIME = 50000; 16 | private static final int TICKS = 100000; 17 | 18 | private TreeSet poSet; 19 | private Map map; 20 | 21 | private Resource last; 22 | private List subjectPoList; 23 | private Properties properties; 24 | private int read; 25 | 26 | CacheHandler(List poList) throws IOException { 27 | // use for log(n) contains 28 | poSet = new TreeSet(); 29 | poSet.addAll(poList); 30 | 31 | map = new HashMap<>(); 32 | last = null; 33 | read = 0; 34 | properties = new Properties(); 35 | InputStream input = new FileInputStream("facet.properties"); 36 | properties.load(input); 37 | } 38 | 39 | private List getAllCombinations(List list) { 40 | Set types = new TreeSet<>(); 41 | List results = new ArrayList<>(); 42 | String instanceOfCode = properties.getProperty("instanceOf"); 43 | for(String e : list) { 44 | if(e.startsWith(instanceOfCode)) { 45 | results.add(e); 46 | types.add(e); 47 | } 48 | } 49 | if(types.isEmpty()) { 50 | return results; 51 | } 52 | for(String element : list) { 53 | if(!types.contains(element)){ 54 | int resultsLength = results.size(); 55 | for(int j = 0; j < resultsLength; j++) { 56 | results.add(results.get(j) + "||" + element); 57 | } 58 | } 59 | } 60 | return results; 61 | } 62 | 63 | @Override 64 | public void handleStatement(Statement s) { 65 | read++; 66 | if(read%TICKS == 0) 67 | System.err.println(read + "lines read..."); 68 | 69 | final String entityIRI = properties.getProperty("entityIRI"); 70 | final String propertyIRI = properties.getProperty("propertyIRI"); 71 | 72 | Resource subject = s.getSubject(); 73 | // FIRST LINE 74 | if(last == null) { 75 | last = subject; 76 | subjectPoList = new ArrayList<>(); 77 | } 78 | // NEW SUBJECT 79 | if(!last.toString().equals(subject.toString())) { 80 | updateEntity(); 81 | // Start a new list for the new subject 82 | last = subject; 83 | subjectPoList = new ArrayList<>(); 84 | } 85 | // PROPERTIES 86 | String predicate = s.getPredicate().toString(); 87 | if(predicate.startsWith(propertyIRI)) { 88 | String p = predicate.substring(propertyIRI.length()); 89 | String object = s.getObject().toString(); 90 | String q = object.substring(entityIRI.length()); 91 | String value = p + "##" + q; 92 | if(poSet.contains(value)) { 93 | subjectPoList.add(value); 94 | } 95 | } 96 | } 97 | 98 | void updateEntity(){ 99 | Collections.sort(subjectPoList); 100 | List combinations = getAllCombinations(subjectPoList); 101 | // Add to the map all possible combinations 102 | for(String combination : combinations) { 103 | Integer count = map.get(combination); 104 | if(count==null) count = 0; 105 | count = count + 1; 106 | map.put(combination, 1); 107 | } 108 | } 109 | 110 | @Override 111 | public void endRDF(){ 112 | updateEntity(); 113 | } 114 | 115 | List getResults() { 116 | // Return results 117 | return map.entrySet().stream() 118 | .filter(e -> e.getValue() > M_PRIME) 119 | .map(Map.Entry::getKey) 120 | .collect(Collectors.toList()); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/DataFields.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | public enum DataFields { 4 | SUBJECT, LABEL, DESCRIPTION, ALT_LABEL, IMAGE, PO, PROPERTY, TYPE, RANK, RANK_STORED 5 | } 6 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/IndexData.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import java.io.*; 4 | import java.util.zip.GZIPInputStream; 5 | 6 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 7 | import org.eclipse.rdf4j.rio.Rio; 8 | import org.eclipse.rdf4j.rio.RDFParser; 9 | import org.eclipse.rdf4j.rio.RDFFormat; 10 | 11 | import org.apache.lucene.analysis.Analyzer; 12 | import org.apache.lucene.index.IndexWriter; 13 | 14 | public class IndexData extends Indexer { 15 | 16 | public static void main(String[] args) throws IOException { 17 | System.out.println("IndexData"); 18 | System.out.println("Reads a WikiData dump (NT format) and creates a index with the most relevant data"); 19 | System.out.println(); 20 | 21 | if(args.length!=2) { 22 | System.out.println("USAGE: Input_NT_file Output_Directory"); 23 | System.exit(0); 24 | } 25 | 26 | long startTime = System.currentTimeMillis(); 27 | 28 | String filename = args[0]; 29 | String outputDir = args[1]; 30 | 31 | Analyzer analyzer = new StandardAnalyzer(); 32 | IndexWriter writer = makeWriter(outputDir, analyzer); 33 | 34 | InputStream in = new FileInputStream(filename); 35 | if(filename.endsWith(".gz")){ 36 | System.err.println("Input file is gzipped."); 37 | in = new GZIPInputStream(in); 38 | } 39 | Reader reader = new InputStreamReader(in, "UTF-8"); 40 | 41 | RDFParser parser = Rio.createParser(RDFFormat.NTRIPLES); 42 | IndexHandler handler = new IndexHandler(writer); 43 | parser.setRDFHandler(handler); 44 | 45 | System.err.println("Parsing file..."); 46 | System.err.println("This may take a while..."); 47 | try { 48 | parser.parse(reader, ""); 49 | } catch (Exception e) { 50 | throw new IOException(); 51 | } finally { 52 | in.close(); 53 | } 54 | handler.finish(); 55 | 56 | long totalTime = System.currentTimeMillis() - startTime; 57 | System.err.println("Total time: " + totalTime + " ms"); 58 | System.err.println("Parsing completed!"); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/IndexHandler.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.document.*; 4 | import org.apache.lucene.index.IndexWriter; 5 | import org.eclipse.rdf4j.model.Literal; 6 | import org.eclipse.rdf4j.model.Resource; 7 | import org.eclipse.rdf4j.model.Statement; 8 | import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; 9 | 10 | import java.io.FileInputStream; 11 | import java.io.IOException; 12 | import java.io.InputStream; 13 | import java.util.ArrayList; 14 | import java.util.Arrays; 15 | import java.util.List; 16 | import java.util.Properties; 17 | 18 | class IndexHandler extends AbstractRDFHandler { 19 | 20 | private IndexWriter writer; 21 | private Resource last; 22 | private List ps; 23 | private Document d; 24 | private int read; 25 | private Properties properties; 26 | 27 | IndexHandler(IndexWriter iw) throws IOException { 28 | super(); 29 | writer = iw; 30 | last = null; 31 | d = null; 32 | read = 0; 33 | properties = new Properties(); 34 | InputStream input = new FileInputStream("facet.properties"); 35 | properties.load(input); 36 | } 37 | 38 | @Override 39 | public void handleStatement(Statement s) { 40 | final int TICKS = 100000; 41 | 42 | final String entityIRI = properties.getProperty("entityIRI"); 43 | final String propertyIRI = properties.getProperty("propertyIRI"); 44 | final String labelIRI = properties.getProperty("labelIRI"); 45 | final String descriptionIRI = properties.getProperty("descriptionIRI"); 46 | final String alt_labelIRI = properties.getProperty("alt_labelIRI"); 47 | 48 | final String instanceOf = properties.getProperty("instanceOf"); 49 | final String image = properties.getProperty("image"); 50 | final String entityPrefix = properties.getProperty("entityPrefix"); 51 | 52 | read++; 53 | if(read%TICKS==0) 54 | System.err.println(read+" lines read..."); 55 | 56 | Resource subject = s.getSubject(); 57 | // FIRST LINE 58 | if(last == null) { 59 | last = subject; 60 | String name = last.toString(); 61 | name = name.replace(entityIRI, ""); 62 | d = new Document(); 63 | ps = new ArrayList<>(); 64 | Field subj = new StringField(DataFields.SUBJECT.name(), name, Field.Store.YES); 65 | d.add(subj); 66 | } 67 | // NEW SUBJECT 68 | if(!last.toString().equals(subject.toString())) { 69 | last = subject; 70 | try { 71 | writer.addDocument(d); 72 | } catch (IOException e) { 73 | System.err.println("Error writing Lucene document."); 74 | } 75 | String name = last.toString(); 76 | name = name.replace(entityIRI, ""); 77 | d = new Document(); 78 | ps = new ArrayList<>(); 79 | Field subj = new StringField(DataFields.SUBJECT.name(), name, Field.Store.YES); 80 | d.add(subj); 81 | } 82 | // PROPERTIES 83 | String predicate = s.getPredicate().toString(); 84 | if(predicate.startsWith(propertyIRI)) { 85 | String p = predicate.replace(propertyIRI, ""); 86 | if(!ps.contains(p)) { 87 | ps.add(p); 88 | Field propertyField = new StringField(DataFields.PROPERTY.name(), p, Field.Store.YES); 89 | d.add(propertyField); 90 | } 91 | String object = s.getObject().toString(); 92 | String value = object.replace(entityIRI, ""); 93 | if(p.equals(instanceOf)) { 94 | Field typeField = new StringField(DataFields.TYPE.name(), value, Field.Store.YES); 95 | d.add(typeField); 96 | } 97 | if(p.equals(image)) { 98 | Field imgField = new StringField(DataFields.IMAGE.name(), value, Field.Store.YES); 99 | d.add(imgField); 100 | } 101 | if(value.startsWith(entityPrefix)) { 102 | String po = p + "##" + value; 103 | Field poField = new StringField(DataFields.PO.name(), po, Field.Store.YES); 104 | d.add(poField); 105 | } 106 | } else { 107 | // LITERAL VALUES 108 | if(!(s.getObject() instanceof Literal)) return; 109 | Literal value = (Literal) s.getObject(); 110 | String language = value.getLanguage().orElse("??"); 111 | // CURRENT LANGUAGES 112 | List languages = Arrays.asList(properties.getProperty("languages").split(",")); 113 | if(!languages.contains(language)) return; 114 | String object = value.getLabel(); 115 | if(predicate.equals(labelIRI)) { 116 | Field label = new TextField(DataFields.LABEL.name()+"-"+language, object, Field.Store.YES); 117 | d.add(label); 118 | } else if(predicate.equals(descriptionIRI)) { 119 | Field description = new TextField(DataFields.DESCRIPTION.name()+"-"+language, object, Field.Store.YES); 120 | d.add(description); 121 | } else if(predicate.equals(alt_labelIRI)) { 122 | Field altLabel = new TextField(DataFields.ALT_LABEL.name()+"-"+language, object, Field.Store.YES); 123 | d.add(altLabel); 124 | } 125 | } 126 | } 127 | 128 | void finish() { 129 | try { 130 | if(d != null) { 131 | System.err.println(read + " lines read in total."); 132 | writer.addDocument(d); 133 | } 134 | writer.close(); 135 | System.out.println("Complete!"); 136 | } catch(IOException e) { 137 | System.err.println("Error. Cannot close Lucene writer"); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/IndexInstances.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.analysis.Analyzer; 4 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 5 | import org.apache.lucene.document.*; 6 | import org.apache.lucene.index.*; 7 | import org.apache.lucene.search.*; 8 | import org.apache.lucene.store.FSDirectory; 9 | import org.apache.lucene.util.BytesRef; 10 | 11 | import java.io.FileInputStream; 12 | import java.io.IOException; 13 | import java.io.InputStream; 14 | import java.nio.file.Files; 15 | import java.nio.file.Paths; 16 | import java.util.*; 17 | import java.util.stream.Collectors; 18 | import java.util.stream.Stream; 19 | 20 | public class IndexInstances extends Indexer { 21 | 22 | private static final int DOCS_LIMIT = 50000; 23 | 24 | private static IndexSearcher dataSearcher; 25 | private static Properties configFile; 26 | 27 | private static Map getAllProperties(TopDocs results) throws IOException { 28 | Map propertiesMap = new HashMap<>(); 29 | String instanceOf = configFile.getProperty("instanceOf"); 30 | String entityPrefix = configFile.getProperty("entityPrefix"); 31 | ScoreDoc[] hits = results.scoreDocs; 32 | for(ScoreDoc hit : hits) { 33 | Document doc = dataSearcher.doc(hit.doc); 34 | IndexableField[] pos = doc.getFields(DataFields.PO.name()); 35 | for(IndexableField po : pos) { 36 | String raw = po.stringValue(); 37 | String[] split = raw.split("##"); 38 | if(!split[1].startsWith(entityPrefix)) continue; 39 | String key = split[0]; 40 | if(key.equals(instanceOf)) continue; 41 | if(propertiesMap.containsKey(key)) { 42 | propertiesMap.replace(key, propertiesMap.get(key)+1); 43 | } else { 44 | propertiesMap.put(key, 1); 45 | } 46 | } 47 | } 48 | return propertiesMap; 49 | } 50 | 51 | private static void addPropertiesToDoc(Map properties, Document d) { 52 | for(Map.Entry entry : properties.entrySet()) { 53 | String code = entry.getKey(); 54 | Integer frequency = entry.getValue(); 55 | String property = code + "##" + frequency.toString(); 56 | Field propertyField = new StringField(InstancesFields.PROPERTY.name(), property, Field.Store.YES); 57 | d.add(propertyField); 58 | } 59 | } 60 | 61 | public static void main(String[] args) throws IOException { 62 | System.out.println("IndexProperties"); 63 | System.out.println("Creates a new index using the previous one"); 64 | System.out.println("The new index contains all instances with related properties"); 65 | System.out.println(); 66 | if (args.length != 3) { 67 | System.out.println("USAGE: DataIndex CachingFile OutputIndex"); 68 | System.exit(0); 69 | } 70 | 71 | // INIT: Make reader and searcher 72 | System.out.println("Init..."); 73 | final int TICKS = 100; 74 | 75 | configFile = new Properties(); 76 | InputStream input = new FileInputStream("facet.properties"); 77 | configFile.load(input); 78 | 79 | long startTime = System.currentTimeMillis(); 80 | 81 | String dataDirectory = args[0]; 82 | IndexReader dataReader = DirectoryReader.open(FSDirectory.open(Paths.get(dataDirectory))); 83 | dataSearcher = new IndexSearcher(dataReader); 84 | 85 | // FIRST PART: Create index for all instances 86 | System.out.println("Creating the index..."); 87 | Fields fields = MultiFields.getFields(dataReader); 88 | Terms terms = fields.terms(DataFields.TYPE.name()); 89 | TermsEnum termsEnum = terms.iterator(); 90 | BytesRef text; 91 | int read = 0; 92 | 93 | String outputDirectory = args[2]; 94 | Analyzer analyzer = new StandardAnalyzer(); 95 | IndexWriter writer = makeWriter(outputDirectory, analyzer); 96 | 97 | while((text = termsEnum.next()) != null) { 98 | read++; 99 | if (read % TICKS == 0) 100 | System.out.println(read + " instances processed..."); 101 | // Get the label of the instance 102 | String q = text.utf8ToString(); 103 | 104 | String boostString = getField(dataSearcher, q, DataFields.RANK_STORED.name()); 105 | double boost = boostString == null ? 0d : Double.parseDouble(boostString); 106 | Term term = new Term(DataFields.TYPE.name(), q); 107 | long occurrences = dataReader.docFreq(term); 108 | if (occurrences < 1) System.err.println("WARN: No occurrences for " + q); 109 | // Create document with Q and Label 110 | Document d = new Document(); 111 | Field qField = new StringField(InstancesFields.ID.name(), q, Field.Store.YES); 112 | Field occurrencesField = new NumericDocValuesField(InstancesFields.FREQUENCY.name(), occurrences); 113 | Field number = new StoredField(InstancesFields.FREQ_STORED.name(), occurrences); 114 | Field boostField = new DoubleDocValuesField(InstancesFields.RANK.name(), boost); 115 | d.add(qField); 116 | d.add(occurrencesField); 117 | d.add(number); 118 | d.add(boostField); 119 | 120 | // LABELS 121 | String[] languages = configFile.getProperty("languages").split(","); 122 | for(String lang : languages) { 123 | String[] labels = getFieldAll(dataSearcher, q, DataFields.LABEL.name()+"-"+lang); 124 | String[] altLabels = getFieldAll(dataSearcher, q, DataFields.ALT_LABEL.name()+"-"+lang); 125 | 126 | if(labels != null) { 127 | for (String label : labels) { 128 | Field labelField = new TextField(InstancesFields.LABEL.name() + "-" + lang, 129 | label, Field.Store.YES); 130 | d.add(labelField); 131 | } 132 | } 133 | if(altLabels != null) { 134 | for (String altLabel : altLabels) { 135 | Field altLabelField = new TextField(InstancesFields.ALT_LABEL.name() + "-" + lang, 136 | altLabel, Field.Store.YES); 137 | d.add(altLabelField); 138 | } 139 | } 140 | } 141 | 142 | // Get all possible properties for this particular instance 143 | if(occurrences > DOCS_LIMIT){ 144 | // Map with property and frequency 145 | Query query = new TermQuery(new Term(DataFields.TYPE.name(), q)); 146 | TopDocs results = dataSearcher.search(query, dataReader.maxDoc()); 147 | Map propertiesMap = getAllProperties(results); 148 | // Add the values to the document 149 | addPropertiesToDoc(propertiesMap, d); 150 | 151 | } 152 | writer.addDocument(d); 153 | } 154 | System.out.println(read + " instances processed in total"); 155 | 156 | System.out.println("Creating caching entries..."); 157 | // SECOND PART: Do the caching for all entries in file 158 | List needsCachingList; 159 | String needCachingFile = args[1]; 160 | // Read the file 161 | try(Stream stream = Files.lines(Paths.get(needCachingFile))) { 162 | needsCachingList = stream 163 | .filter(l -> l.split("\\|\\|").length > 1) 164 | .collect(Collectors.toList()); 165 | } 166 | // Do the cache 167 | read = 0; 168 | int totalEntries = needsCachingList.size(); 169 | for(String entry : needsCachingList) { 170 | read++; 171 | if(read%10 == 0) { 172 | System.out.println(read + " / " + totalEntries + " entries processed in total"); 173 | } 174 | String[] queryParts = entry.split("\\|\\|"); 175 | String type = queryParts[0].split("##")[1]; 176 | String id = type; 177 | BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); 178 | queryBuilder.add(new TermQuery(new Term(DataFields.TYPE.name(), type)), BooleanClause.Occur.MUST); 179 | for(int i = 1; i < queryParts.length; i++) { 180 | String po = queryParts[i]; 181 | id = id + "||" + po; 182 | queryBuilder.add(new TermQuery(new Term(DataFields.PO.name(), po)), BooleanClause.Occur.MUST); 183 | } 184 | System.err.println("Creating cache for " + id); 185 | Query query = queryBuilder.build(); 186 | Document doc = new Document(); 187 | Field idField = new StringField(InstancesFields.ID.name(), id, Field.Store.YES); 188 | doc.add(idField); 189 | TopDocs results = dataSearcher.search(query, dataReader.numDocs()); 190 | Map propertiesMap = getAllProperties(results); 191 | addPropertiesToDoc(propertiesMap, doc); 192 | writer.addDocument(doc); 193 | } 194 | 195 | System.out.println("Caching complete"); 196 | writer.close(); 197 | long totalTime = System.currentTimeMillis() - startTime; 198 | System.err.println("Total time: " + totalTime + " ms"); 199 | System.out.println("Finish!"); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/IndexProperties.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.analysis.Analyzer; 4 | import org.apache.lucene.analysis.en.EnglishAnalyzer; 5 | import org.apache.lucene.document.*; 6 | import org.apache.lucene.index.*; 7 | import org.apache.lucene.search.*; 8 | import org.apache.lucene.store.FSDirectory; 9 | import org.apache.lucene.util.BytesRef; 10 | 11 | import java.io.FileInputStream; 12 | import java.io.IOException; 13 | import java.io.InputStream; 14 | import java.nio.file.Paths; 15 | import java.util.*; 16 | import java.util.stream.*; 17 | 18 | public class IndexProperties extends Indexer { 19 | 20 | private static class Property { 21 | String p; 22 | String o; 23 | long n; 24 | 25 | private Property(String p, String o, long n) { 26 | this.p = p; this.o = o; this.n = n; 27 | } 28 | } 29 | 30 | public static void main(String[] args) throws IOException { 31 | System.out.println("IndexProperties"); 32 | System.out.println("Creates a new index using the previous one"); 33 | System.out.println("The new index contains all properties"); 34 | System.out.println(); 35 | 36 | if(args.length != 2) { 37 | System.out.println("USAGE: Lucene_Index_Folder Index_Out_Dir"); 38 | System.exit(0); 39 | } 40 | 41 | // INIT: Make reader and searcher 42 | System.out.println("Init..."); 43 | final int TICKS = 1000; 44 | String in = args[0]; 45 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(in))); 46 | Fields fields = MultiFields.getFields(reader); 47 | IndexSearcher searcher = new IndexSearcher(reader); 48 | Analyzer analyzer = new EnglishAnalyzer(); 49 | BytesRef text; 50 | int read = 0; 51 | 52 | // READ Properties file 53 | Properties properties = new Properties(); 54 | InputStream input = new FileInputStream("facet.properties"); 55 | properties.load(input); 56 | 57 | // FIRST PART: Compute m' 58 | System.out.println("Count frequency for all PO values..."); 59 | long startTime = System.currentTimeMillis(); 60 | List poList = new ArrayList<>(); 61 | Terms poTerms = fields.terms(DataFields.PO.name()); 62 | TermsEnum poIterator = poTerms.iterator(); 63 | while((text = poIterator.next()) != null) { 64 | read++; 65 | if(read%TICKS == 0) 66 | System.out.println(read+" PO values processed..."); 67 | String poCode = text.utf8ToString(); 68 | String[] poSplit = poCode.split("##"); 69 | String value = poSplit[1]; 70 | if(!value.startsWith("Q")) continue; 71 | Term poTerm = new Term(DataFields.PO.name(), poCode); 72 | poList.add(new Property(poSplit[0], value, reader.docFreq(poTerm))); 73 | } 74 | 75 | System.out.println("Grouping by property..."); 76 | Map> poMap = poList.stream().collect(Collectors.groupingBy(p -> p.p)); 77 | 78 | Map mValues = new HashMap<>(); 79 | Map valuesCardinality = new HashMap<>(); 80 | 81 | System.out.println("Obtaining max value..."); 82 | read = 0; 83 | for(Map.Entry> entry : poMap.entrySet()) { 84 | read++; 85 | if(read%TICKS == 0) 86 | System.out.println(read+" properties processed..."); 87 | valuesCardinality.put(entry.getKey(), entry.getValue().size()); 88 | long maxFrequency = 0L; 89 | Property maxProperty = null; 90 | List poSublist = entry.getValue(); 91 | for(Property poEntry : poSublist) { 92 | if(poEntry.n > maxFrequency) { 93 | maxFrequency = poEntry.n; 94 | maxProperty = poEntry; 95 | } 96 | } 97 | mValues.put(entry.getKey(), maxProperty); 98 | } 99 | long endTime = System.currentTimeMillis(); 100 | long totalTime = (endTime - startTime) / 1000 ; 101 | System.out.println("Computing m' - Total time: " + totalTime + " s"); 102 | 103 | // SECOND PART: Create index for all properties 104 | Terms terms = fields.terms(DataFields.PROPERTY.name()); 105 | TermsEnum termsEnum = terms.iterator(); 106 | String propDir = args[1]; 107 | IndexWriter writer = makeWriter(propDir, analyzer); 108 | read = 0; 109 | 110 | System.out.println("Creating the index"); 111 | while((text = termsEnum.next()) != null) { 112 | read++; 113 | if(read%TICKS == 0) 114 | System.out.println(read+" properties processed..."); 115 | // Get the label of the property 116 | String p = text.utf8ToString(); 117 | String label = getField(searcher, p, DataFields.LABEL.name()); 118 | Term term = new Term(DataFields.PROPERTY.name(), p); 119 | long occurrences = reader.docFreq(term); 120 | if(label == null) continue; 121 | // Create document with P and Label 122 | Document d = new Document(); 123 | Field pField = new StringField(PropertiesFields.P.name(), p, Field.Store.YES); 124 | Field labelField = new TextField(PropertiesFields.LABEL.name(), label, Field.Store.YES); 125 | Field occurrencesField = new NumericDocValuesField(PropertiesFields.OCCURRENCES.name(), occurrences); 126 | Field number = new StoredField(PropertiesFields.NUMBER.name(), occurrences); 127 | 128 | Property thisProperty = mValues.get(p); 129 | if (thisProperty != null) { 130 | Field mPrime = new StoredField(PropertiesFields.M_PRIME.name(), thisProperty.n); 131 | Field topValue = new StringField(PropertiesFields.TOP_VALUE.name(), thisProperty.o, Field.Store.YES); 132 | d.add(mPrime); 133 | d.add(topValue); 134 | } 135 | 136 | Integer cardinality = valuesCardinality.get(p); 137 | int valuesCard = cardinality == null ? 0 : cardinality; 138 | Field cardField = new StoredField(PropertiesFields.VALUES_CARD.name(), valuesCard); 139 | d.add(cardField); 140 | d.add(pField); 141 | d.add(labelField); 142 | d.add(occurrencesField); 143 | d.add(number); 144 | // Cache the values except instanceOf 145 | String instanceOfCode = properties.getProperty("instanceOf"); 146 | if(!p.equals(instanceOfCode)) { 147 | Map valuesMap = new HashMap<>(); 148 | Query query = new TermQuery(new Term(DataFields.PROPERTY.name(), p)); 149 | TopDocs results = searcher.search(query, reader.maxDoc()); 150 | ScoreDoc[] hits = results.scoreDocs; 151 | for(ScoreDoc hit : hits) { 152 | Document doc = searcher.doc(hit.doc); 153 | IndexableField[] pos = doc.getFields(DataFields.PO.name()); 154 | for(IndexableField po : pos) { 155 | String raw = po.stringValue(); 156 | if(!raw.startsWith(p)) continue; 157 | String[] rawSplit = raw.split("##"); 158 | String value = rawSplit[1]; 159 | if(!value.startsWith("Q")) continue; 160 | if(valuesMap.containsKey(value)) { 161 | valuesMap.replace(value, valuesMap.get(value)+1); 162 | } else { 163 | valuesMap.put(value, 1); 164 | } 165 | } 166 | } 167 | // Add the values to the document 168 | List> valuesList = new ArrayList<>(valuesMap.entrySet()); 169 | Collections.sort(valuesList, 170 | (Map.Entry o1, Map.Entry o2) -> o2.getValue().compareTo(o1.getValue())); 171 | for(Map.Entry entry : valuesList) { 172 | if(entry.getValue() < 5) break; 173 | Field valueField = new StringField(PropertiesFields.VALUES.name(), entry.getKey(), Field.Store.YES); 174 | d.add(valueField); 175 | } 176 | } 177 | writer.addDocument(d); 178 | } 179 | System.out.println(read+" properties processed in total"); 180 | writer.close(); 181 | System.out.println("Complete!"); 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/IndexValues.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 4 | import org.apache.lucene.document.Document; 5 | import org.apache.lucene.document.Field; 6 | import org.apache.lucene.document.StoredField; 7 | import org.apache.lucene.document.StringField; 8 | import org.apache.lucene.index.*; 9 | import org.apache.lucene.search.*; 10 | import org.apache.lucene.store.FSDirectory; 11 | 12 | import javax.json.*; 13 | import java.io.FileInputStream; 14 | import java.io.IOException; 15 | import java.io.InputStream; 16 | import java.nio.file.Paths; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.Properties; 20 | 21 | public class IndexValues extends Indexer { 22 | 23 | public static void main(String[] args) throws IOException { 24 | System.out.println("IndexValues"); 25 | System.out.println("Creates a new index using two previous indexes"); 26 | System.out.println("The new index contains all values for cached instances"); 27 | System.out.println(); 28 | if (args.length != 3) { 29 | System.out.println("USAGE: DataIndex InstancesIndex OutputIndex"); 30 | System.exit(0); 31 | } 32 | 33 | // INIT: Make reader, searcher and writer 34 | System.out.println("Init..."); 35 | final int TICKS = 100; 36 | 37 | String dataDirectory = args[0]; 38 | IndexReader dataReader = DirectoryReader.open(FSDirectory.open(Paths.get(dataDirectory))); 39 | IndexSearcher dataSearcher = new IndexSearcher(dataReader); 40 | 41 | String propertyDirectory = args[1]; 42 | IndexReader instancesReader = DirectoryReader.open(FSDirectory.open(Paths.get(propertyDirectory))); 43 | 44 | IndexWriter writer = makeWriter(args[2], new StandardAnalyzer()); 45 | 46 | Properties configFile = new Properties(); 47 | InputStream input = new FileInputStream("facet.properties"); 48 | configFile.load(input); 49 | String[] languages = configFile.getProperty("languages").split(","); 50 | String entityPrefix = configFile.getProperty("entityPrefix"); 51 | 52 | long start = System.currentTimeMillis(); 53 | for(int i=0; i queries = new ArrayList<>(); 72 | queries.add(new TermQuery(new Term(DataFields.TYPE.name(), queryParts[0]))); 73 | 74 | if(queryParts.length > 1) { 75 | for(int j=1; j possibleValues = new ArrayList<>(); 99 | for(ScoreDoc hit : hits) { 100 | Document searchDoc = dataSearcher.doc(hit.doc); 101 | IndexableField[] pos = searchDoc.getFields(DataFields.PO.name()); 102 | for(IndexableField po : pos) { 103 | String[] rawPO = po.stringValue().split("##"); 104 | if(!property.equals(rawPO[0])) continue; 105 | String value = rawPO[1]; 106 | if(!value.startsWith(entityPrefix)) continue; 107 | if(!possibleValues.contains(value)) possibleValues.add(value); 108 | } 109 | } 110 | 111 | JsonBuilderFactory factory = Json.createBuilderFactory(null); 112 | JsonArrayBuilder main = factory.createArrayBuilder(); 113 | for(String value : possibleValues) { 114 | JsonObjectBuilder valueObject = factory.createObjectBuilder(); 115 | valueObject.add("id", property + "##" + value); 116 | String pr = getField(dataSearcher, value, DataFields.RANK_STORED.name()); 117 | if(pr == null) continue; 118 | valueObject.add("rank", Double.parseDouble(pr)); 119 | for(String language : languages) { 120 | String label = getField(dataSearcher, value, DataFields.LABEL.name()+"-"+language); 121 | if (label == null) label = value; 122 | valueObject.add("name-"+language, label); 123 | } 124 | main.add(valueObject); 125 | } 126 | String jsonArray = main.build().toString(); 127 | Field dataField = new StoredField(ValuesFields.VALUES.name(), jsonArray); 128 | valuesDocument.add(dataField); 129 | writer.addDocument(valuesDocument); 130 | } 131 | 132 | System.err.println(instanceBase + " finished!"); 133 | } 134 | long totalTime = System.currentTimeMillis() - start; 135 | long minutes = totalTime / 1000 / 60; 136 | long hours = minutes / 60; 137 | minutes = minutes % 60; 138 | System.err.println("Total time: " + hours + " h " + minutes + " min"); 139 | writer.close(); 140 | System.err.println("Complete!"); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/Indexer.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.analysis.Analyzer; 4 | import org.apache.lucene.document.Document; 5 | import org.apache.lucene.index.IndexWriter; 6 | import org.apache.lucene.index.IndexWriterConfig; 7 | import org.apache.lucene.index.Term; 8 | import org.apache.lucene.search.*; 9 | import org.apache.lucene.store.Directory; 10 | import org.apache.lucene.store.FSDirectory; 11 | 12 | import java.io.File; 13 | import java.io.IOException; 14 | import java.nio.file.Path; 15 | import java.nio.file.Paths; 16 | 17 | abstract class Indexer { 18 | 19 | static IndexWriter makeWriter(String path, Analyzer analyzer) throws IOException { 20 | Path fDir = Paths.get(path); 21 | File file = fDir.toFile(); 22 | 23 | if(file.exists()){ 24 | if(file.isFile()){ 25 | throw new IOException("Cannot open directory at "+path+" since its already a file."); 26 | } 27 | } else{ 28 | if(!file.mkdirs()){ 29 | throw new IOException("Cannot open directory at "+path+". Try create the directory manually."); 30 | } 31 | } 32 | 33 | Directory dir = FSDirectory.open(fDir); 34 | IndexWriterConfig iwc = new IndexWriterConfig(analyzer); 35 | iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 36 | return new IndexWriter(dir, iwc); 37 | } 38 | 39 | static String getField(IndexSearcher searcher, String subject, String field) throws IOException { 40 | Term term = new Term(DataFields.SUBJECT.name(), subject); 41 | Query query = new TermQuery(term); 42 | TopDocs results = searcher.search(query, 1); 43 | if(results.totalHits < 1) { 44 | // System.err.println("WARN: Subject "+subject+" does not exist or was not indexed."); 45 | return null; 46 | } 47 | ScoreDoc[] hits = results.scoreDocs; 48 | Document doc = searcher.doc(hits[0].doc); 49 | return doc.get(field); 50 | } 51 | 52 | static String[] getFieldAll(IndexSearcher searcher, String subject, String field) throws IOException { 53 | Term term = new Term(DataFields.SUBJECT.name(), subject); 54 | Query query = new TermQuery(term); 55 | TopDocs results = searcher.search(query, 1); 56 | if(results.totalHits < 1) { 57 | // System.err.println("WARN: Subject "+subject+" does not exist or was not indexed."); 58 | return null; 59 | } 60 | ScoreDoc[] hits = results.scoreDocs; 61 | Document doc = searcher.doc(hits[0].doc); 62 | return doc.getValues(field); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/InstancesFields.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | public enum InstancesFields { 4 | ID, LABEL, ALT_LABEL, FREQUENCY, FREQ_STORED, RANK, PROPERTY 5 | } 6 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/PropertiesFields.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | enum PropertiesFields { 4 | P, LABEL, VALUES, OCCURRENCES, NUMBER, M_PRIME, TOP_VALUE, VALUES_CARD 5 | } 6 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/PropertiesStatistics.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.document.Document; 4 | import org.apache.lucene.index.DirectoryReader; 5 | import org.apache.lucene.index.IndexReader; 6 | import org.apache.lucene.store.FSDirectory; 7 | 8 | import java.io.FileWriter; 9 | import java.io.IOException; 10 | import java.io.PrintWriter; 11 | import java.nio.file.Paths; 12 | 13 | public class PropertiesStatistics { 14 | 15 | public static void main(String[] args) throws IOException { 16 | 17 | if(args.length != 2) { 18 | System.err.println("USAGE: Properties_Indexes_Folder Output_File"); 19 | System.exit(0); 20 | } 21 | String propDir = args[0]; 22 | String statisticsFile = args[1]; 23 | 24 | PrintWriter pw = new PrintWriter(new FileWriter(statisticsFile)); 25 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(propDir))); 26 | int size = reader.maxDoc(); 27 | 28 | pw.println("P,Q?,|s|,m',|o|"); 29 | 30 | for(int i = 0; i map = new HashMap<>(); 90 | System.err.println("Graph size: " + graphLength); 91 | 92 | // CREATE MAP TO TRANSLATE SUBJECT TO ID 93 | System.err.println("Creating map..."); 94 | for(int i=0; i outLinksList; 20 | private Map map; 21 | private Properties properties; 22 | 23 | RankHandler(int[][] graph, Map map) throws IOException { 24 | super(); 25 | last = null; 26 | outLinksList = null; 27 | read = 0; 28 | currentIndex = 0; 29 | this.graph = graph; 30 | this.map = map; 31 | properties = new Properties(); 32 | InputStream input = new FileInputStream("facet.properties"); 33 | properties.load(input); 34 | } 35 | 36 | @Override 37 | public void handleStatement(Statement s) { 38 | final int TICKS = 100000; 39 | 40 | final String entityIRI = properties.getProperty("entityIRI"); 41 | read++; 42 | if(read%TICKS==0) { 43 | System.err.println(read + " lines read..."); 44 | 45 | long allocatedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); 46 | long freeMemory = Runtime.getRuntime().maxMemory() - allocatedMemory; 47 | 48 | System.err.println("Free memory: " + freeMemory); 49 | } 50 | 51 | Resource subject = s.getSubject(); 52 | // FIRST LINE 53 | if(last == null) { 54 | last = subject; 55 | String name = last.toString(); 56 | name = name.replace(entityIRI, ""); 57 | if(map.containsKey(name)) { 58 | currentIndex = map.get(name); 59 | outLinksList = new TreeSet<>(); 60 | } else { 61 | outLinksList = null; 62 | } 63 | } 64 | // NEW SUBJECT 65 | if(!last.toString().equals(subject.toString())) { 66 | if(outLinksList != null && !outLinksList.isEmpty()) { 67 | graph[currentIndex] = outLinksList.stream().mapToInt(a -> a).toArray(); 68 | } 69 | 70 | last = subject; 71 | String name = last.toString(); 72 | name = name.replace(entityIRI, ""); 73 | if(map.containsKey(name)) { 74 | currentIndex = map.get(name); 75 | outLinksList = new TreeSet<>(); 76 | } else { 77 | outLinksList = null; 78 | } 79 | } 80 | // PROPERTIES 81 | if((s.getObject() instanceof Literal)) return; 82 | String object = s.getObject().toString(); 83 | String value = object.replace(entityIRI, ""); 84 | if(outLinksList != null && map.containsKey(value)) { 85 | int valueId = map.get(value); 86 | if(!outLinksList.contains(valueId)) 87 | outLinksList.add(valueId); 88 | } 89 | } 90 | 91 | void finish() { 92 | if(outLinksList != null && !outLinksList.isEmpty()) { 93 | graph[currentIndex] = outLinksList.stream().mapToInt(a -> a).toArray(); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/ScoreBoostsOperator.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import java.util.function.DoubleUnaryOperator; 4 | 5 | public class ScoreBoostsOperator implements DoubleUnaryOperator{ 6 | 7 | @Override 8 | public double applyAsDouble(double operand) { 9 | double min = 6.641535269908322E-9; 10 | double factor = 1d / min; 11 | return Math.log(operand * factor) + 1; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/SearchData.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import java.io.*; 4 | import java.nio.file.Paths; 5 | import java.util.HashMap; 6 | 7 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 | import org.apache.lucene.document.Document; 9 | import org.apache.lucene.index.DirectoryReader; 10 | import org.apache.lucene.index.IndexReader; 11 | import org.apache.lucene.index.IndexableField; 12 | import org.apache.lucene.index.Term; 13 | import org.apache.lucene.search.*; 14 | 15 | import org.apache.lucene.analysis.Analyzer; 16 | import org.apache.lucene.store.FSDirectory; 17 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 18 | 19 | public class SearchData { 20 | 21 | private static final int DOCS_PER_PAGE = 10; 22 | 23 | public static void main(String[] args) throws IOException { 24 | System.out.println("SearchData"); 25 | System.out.println("Search data from the index previously created"); 26 | System.out.println(); 27 | 28 | if(args.length!=2) { 29 | System.err.println("USAGE: Lucene_Indexes_Folder Language"); 30 | System.exit(0); 31 | } 32 | String in = args[0]; 33 | String lang = args[1]; 34 | 35 | String labelFieldName = DataFields.LABEL.name() + "-" + lang; 36 | String altLabelFieldName = DataFields.ALT_LABEL.name() + "-" + lang; 37 | String descriptionFieldName = DataFields.DESCRIPTION.name() + "-" + lang; 38 | 39 | // open a reader for the directory 40 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(in))); 41 | // open a searcher over the reader 42 | IndexSearcher searcher = new IndexSearcher(reader); 43 | // use the same analyser as the build 44 | Analyzer analyzer = new StandardAnalyzer(); 45 | 46 | // this accepts queries/searches and parses them into 47 | // searches over the index 48 | HashMap boostsMap = new HashMap<>(); 49 | boostsMap.put(altLabelFieldName, 2f); 50 | boostsMap.put(descriptionFieldName, 1f); 51 | boostsMap.put(labelFieldName, 5f); 52 | 53 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser( 54 | new String[] {labelFieldName, descriptionFieldName, altLabelFieldName}, 55 | analyzer, boostsMap); 56 | 57 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in, "utf-8")); 58 | 59 | while (true) { 60 | System.out.println(reader.numDocs() + " documents indexed."); 61 | System.out.println("Enter search code:"); 62 | System.out.println("0:subject 1:keyword 2:property 3:property-object 4:exit"); 63 | int opCode; 64 | try { 65 | opCode = Integer.parseInt(br.readLine()); 66 | } catch (Exception e) { 67 | continue; 68 | } 69 | if(opCode == 4) break; 70 | if(opCode > 4 || opCode < 0) continue; 71 | 72 | System.out.println("Enter a keyword search phrase:"); 73 | 74 | String line = br.readLine(); 75 | if(line!=null){ 76 | line = line.trim(); 77 | if(!line.isEmpty()){ 78 | try{ 79 | // parse query 80 | Query query; 81 | Term term; 82 | switch(opCode) { 83 | case 0: 84 | term = new Term(DataFields.SUBJECT.name(), line); 85 | query = new TermQuery(term); 86 | break; 87 | case 1: 88 | query = queryParser.parse(line); 89 | break; 90 | case 2: 91 | term = new Term(DataFields.PROPERTY.name(), line); 92 | query = new TermQuery(term); 93 | break; 94 | default: 95 | term = new Term(DataFields.PO.name(), line); 96 | query = new WildcardQuery(term); 97 | } 98 | 99 | // get hits 100 | TopDocs results = searcher.search(query, DOCS_PER_PAGE); 101 | ScoreDoc[] hits = results.scoreDocs; 102 | 103 | System.out.println("Running query: "+line); 104 | System.out.println("Parsed query: "+query); 105 | System.out.println("Matching documents: "+results.totalHits); 106 | System.out.println("Showing top "+DOCS_PER_PAGE+" results"); 107 | System.out.println(); 108 | System.out.println(); 109 | 110 | for(ScoreDoc hit : hits) { 111 | Document doc = searcher.doc(hit.doc); 112 | String subject = doc.get(DataFields.SUBJECT.name()); 113 | String label = doc.get(labelFieldName); 114 | String desc = doc.get(descriptionFieldName); 115 | String alt = doc.get(altLabelFieldName); 116 | String image = doc.get(DataFields.IMAGE.name()); 117 | IndexableField[] instances = doc.getFields(DataFields.TYPE.name()); 118 | IndexableField[] pos = doc.getFields(DataFields.PO.name()); 119 | String boost = doc.get(DataFields.RANK_STORED.name()); 120 | System.out.println(subject + " - " + label); 121 | System.out.println(image); 122 | System.out.println(desc); 123 | System.out.println(alt); 124 | System.out.println("Instances:"); 125 | for(IndexableField instance : instances) { 126 | System.out.println("\t" + instance.stringValue()); 127 | } 128 | System.out.println("Property ## Value:"); 129 | for(IndexableField po : pos) { 130 | System.out.println("\t" + po.stringValue()); 131 | } 132 | System.out.println("Score: " + hit.score); 133 | System.out.println("PageRank: " + boost); 134 | System.out.println(); 135 | System.out.println("_______________________________________"); 136 | System.out.println(); 137 | } 138 | } catch(Exception e) { 139 | System.err.println("Error with query '"+line+"'"); 140 | e.printStackTrace(); 141 | } 142 | } 143 | } 144 | 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/SearchInstances.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.analysis.Analyzer; 4 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 5 | import org.apache.lucene.document.Document; 6 | import org.apache.lucene.index.DirectoryReader; 7 | import org.apache.lucene.index.IndexReader; 8 | import org.apache.lucene.index.Term; 9 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 10 | import org.apache.lucene.search.*; 11 | import org.apache.lucene.store.FSDirectory; 12 | 13 | import java.io.BufferedReader; 14 | import java.io.IOException; 15 | import java.io.InputStreamReader; 16 | import java.nio.file.Paths; 17 | import java.util.HashMap; 18 | 19 | public class SearchInstances { 20 | 21 | public static void main(String[] args) throws IOException { 22 | System.out.println("SearchInstances"); 23 | System.out.println("Search instances from the data to get its Subject"); 24 | System.out.println(); 25 | 26 | if(args.length!=2) { 27 | System.err.println("USAGE: Instances_Indexes_Folder Language"); 28 | System.exit(0); 29 | } 30 | String insDir = args[0]; 31 | String lang = args[1]; 32 | final int DOCS_PER_PAGE = 10; 33 | 34 | String labelFieldName = InstancesFields.LABEL.name() + "-" + lang; 35 | String altLabelFieldName = InstancesFields.ALT_LABEL.name() + "-" + lang; 36 | 37 | // open a reader for the directory 38 | IndexReader insReader = DirectoryReader.open(FSDirectory.open(Paths.get(insDir))); 39 | // open a searcher over the reader 40 | IndexSearcher insSearcher = new IndexSearcher(insReader); 41 | 42 | Analyzer analyzer = new StandardAnalyzer(); 43 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in, "utf-8")); 44 | SortField sortField = new SortedNumericSortField(InstancesFields.FREQUENCY.name(), SortField.Type.LONG, true); 45 | Sort sort = new Sort(sortField); 46 | System.out.println("Total instances: " + insReader.numDocs()); 47 | 48 | HashMap boostsMap = new HashMap<>(); 49 | boostsMap.put(altLabelFieldName, 2f); 50 | boostsMap.put(labelFieldName, 5f); 51 | 52 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser( 53 | new String[] {labelFieldName, altLabelFieldName}, 54 | analyzer, boostsMap); 55 | 56 | while (true) { 57 | System.out.println("Enter search code:"); 58 | System.out.println("0:subject 1:name 2:all 3:exit"); 59 | int opCode; 60 | try { 61 | opCode = Integer.parseInt(br.readLine()); 62 | } catch (Exception e) { 63 | continue; 64 | } 65 | if(opCode == 3) break; 66 | if(opCode > 3 || opCode < 0) continue; 67 | 68 | System.out.println("Enter a keyword search phrase:"); 69 | 70 | String line = br.readLine(); 71 | if(line != null) { 72 | line = line.trim(); 73 | if(!line.isEmpty()){ 74 | try{ 75 | // parse query 76 | Query query; 77 | TopDocs results; 78 | if(opCode == 0) { 79 | query = new WildcardQuery(new Term(InstancesFields.ID.name(), line)); 80 | } else if(opCode == 1) { 81 | query = queryParser.parse(line); 82 | } else { 83 | query = new MatchAllDocsQuery(); 84 | } 85 | 86 | // get hits 87 | results = insSearcher.search(query, DOCS_PER_PAGE, sort); 88 | ScoreDoc[] hits = results.scoreDocs; 89 | 90 | System.out.println("Running query: "+line); 91 | System.out.println("Parsed query: "+query); 92 | System.out.println("Matching documents: "+results.totalHits); 93 | System.out.println("Showing top "+DOCS_PER_PAGE+" results"); 94 | 95 | for(int i=0; i 2 || opCode < 0) continue; 53 | 54 | System.out.println("Enter a keyword search phrase:"); 55 | 56 | String line = br.readLine(); 57 | if(line != null) { 58 | line = line.trim(); 59 | if(!line.isEmpty()){ 60 | try{ 61 | // parse query 62 | Query query; 63 | TopDocs results; 64 | if(opCode == 0) { 65 | query = new TermQuery(new Term(PropertiesFields.P.name(), line)); 66 | } else { 67 | query = new QueryParser(PropertiesFields.LABEL.name(), analyzer).parse(line); 68 | } 69 | 70 | // get hits 71 | results = propSearcher.search(query, DOCS_PER_PAGE, sort); 72 | ScoreDoc[] hits = results.scoreDocs; 73 | 74 | System.out.println("Running query: "+line); 75 | System.out.println("Parsed query: "+query); 76 | System.out.println("Matching documents: "+results.totalHits); 77 | System.out.println("Showing top "+DOCS_PER_PAGE+" results"); 78 | 79 | for(ScoreDoc hit : hits) { 80 | String subject; 81 | String label; 82 | String occurrences; 83 | Document doc = propSearcher.doc(hit.doc); 84 | subject = doc.get(PropertiesFields.P.name()); 85 | label = doc.get(PropertiesFields.LABEL.name()); 86 | occurrences = doc.get(PropertiesFields.NUMBER.name()); 87 | String mPrime = doc.get(PropertiesFields.M_PRIME.name()); 88 | System.out.println(subject+"\t"+label+"\t"+occurrences+"\t"+mPrime); 89 | System.out.println("_______________________________________"); 90 | System.out.println(); 91 | } 92 | } catch(Exception e) { 93 | System.err.println("Error with query '"+line+"'"); 94 | e.printStackTrace(); 95 | } 96 | } 97 | } 98 | 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/SearchValues.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | import org.apache.lucene.document.Document; 4 | import org.apache.lucene.index.DirectoryReader; 5 | import org.apache.lucene.index.IndexReader; 6 | import org.apache.lucene.index.Term; 7 | import org.apache.lucene.search.*; 8 | import org.apache.lucene.store.FSDirectory; 9 | 10 | import java.io.BufferedReader; 11 | import java.io.IOException; 12 | import java.io.InputStreamReader; 13 | import java.nio.file.Paths; 14 | 15 | public class SearchValues { 16 | 17 | public static void main(String[] args) throws IOException { 18 | System.out.println("SearchValues"); 19 | System.out.println("Search values for the properties from the data"); 20 | System.out.println(); 21 | 22 | if(args.length!=1) { 23 | System.err.println("USAGE: Values_Index"); 24 | System.exit(0); 25 | } 26 | String valDir = args[0]; 27 | final int DOCS_PER_PAGE = 10; 28 | 29 | // open a reader for the directory 30 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(valDir))); 31 | // open a searcher over the reader 32 | IndexSearcher searcher = new IndexSearcher(reader); 33 | 34 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in, "utf-8")); 35 | System.out.println("Total instances: " + reader.numDocs()); 36 | 37 | while (true) { 38 | System.out.println("Enter operation:"); 39 | System.out.println("0:search 1:exit"); 40 | int opCode; 41 | try { 42 | opCode = Integer.parseInt(br.readLine()); 43 | } catch (Exception e) { 44 | continue; 45 | } 46 | if(opCode == 1) break; 47 | if(opCode != 0) continue; 48 | 49 | System.out.println("Enter a regex to search:"); 50 | 51 | String line = br.readLine(); 52 | if(line != null) { 53 | line = line.trim(); 54 | if(!line.isEmpty()){ 55 | try{ 56 | // parse query 57 | Query query = new WildcardQuery(new Term(ValuesFields.BASE.name(), line)); 58 | // get hits 59 | TopDocs results = searcher.search(query, DOCS_PER_PAGE); 60 | ScoreDoc[] hits = results.scoreDocs; 61 | 62 | System.out.println("Running query: "+line); 63 | System.out.println("Parsed query: "+query); 64 | System.out.println("Matching documents: "+results.totalHits); 65 | System.out.println("Showing top "+DOCS_PER_PAGE+" results"); 66 | 67 | for(int i=0; i max) { 66 | max = rank; 67 | } 68 | 69 | Document oldDocument = reader.document(doc); 70 | String subject = oldDocument.get(DataFields.SUBJECT.name()); 71 | String image = oldDocument.get(DataFields.IMAGE.name()); 72 | String[] instances = oldDocument.getValues(DataFields.TYPE.name()); 73 | String[] ps = oldDocument.getValues(DataFields.PROPERTY.name()); 74 | String[] pos = oldDocument.getValues(DataFields.PO.name()); 75 | 76 | Document newDocument = new Document(); 77 | Field subjectField = new StringField(DataFields.SUBJECT.name(), subject, Field.Store.YES); 78 | newDocument.add(subjectField); 79 | if(image != null) { 80 | Field imageField = new StringField(DataFields.IMAGE.name(), image, Field.Store.YES); 81 | newDocument.add(imageField); 82 | } 83 | for(String instance : instances) { 84 | Field instanceField = new StringField(DataFields.TYPE.name(), instance, Field.Store.YES); 85 | newDocument.add(instanceField); 86 | } 87 | for(String p : ps) { 88 | Field pField = new StringField(DataFields.PROPERTY.name(), p, Field.Store.YES); 89 | newDocument.add(pField); 90 | } 91 | for(String po : pos) { 92 | Field poField = new StringField(DataFields.PO.name(), po, Field.Store.YES); 93 | newDocument.add(poField); 94 | } 95 | Field boostsField = new DoubleDocValuesField(DataFields.RANK.name(), rank); 96 | newDocument.add(boostsField); 97 | Field storedField = new StoredField(DataFields.RANK_STORED.name(), rank); 98 | newDocument.add(storedField); 99 | 100 | // LABELS 101 | for(String lang : languages) { 102 | String labelFieldName = DataFields.LABEL.name() + "-" + lang; 103 | String altLabelFieldName = DataFields.ALT_LABEL.name() + "-" + lang; 104 | String descriptionFieldName = DataFields.DESCRIPTION.name() + "-" + lang; 105 | 106 | String[] labels = oldDocument.getValues(labelFieldName); 107 | String[] altLabels = oldDocument.getValues(altLabelFieldName); 108 | String[] descriptions = oldDocument.getValues(descriptionFieldName); 109 | 110 | for(String label: labels) { 111 | Field labelField = new TextField(labelFieldName, label, Field.Store.YES); 112 | newDocument.add(labelField); 113 | } 114 | for(String altLabel: altLabels) { 115 | Field altLabelField = new TextField(altLabelFieldName, altLabel, Field.Store.YES); 116 | newDocument.add(altLabelField); 117 | } 118 | for(String description : descriptions) { 119 | Field descriptionField = new TextField(descriptionFieldName, description, Field.Store.YES); 120 | newDocument.add(descriptionField); 121 | } 122 | } 123 | 124 | writer.addDocument(newDocument); 125 | read++; 126 | } 127 | 128 | writer.close(); 129 | 130 | long totalTime = System.currentTimeMillis() - startTime; 131 | System.err.println("Total time: " + totalTime + " ms"); 132 | 133 | System.out.println("Min value: " + min); 134 | System.out.println("Max value: " + max); 135 | System.out.println("Complete! New directory created with boosts."); 136 | } 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/core/ValuesFields.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.core; 2 | 3 | public enum ValuesFields { 4 | BASE, VALUES 5 | } 6 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/DataEntry.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import java.io.PrintStream; 4 | 5 | class DataEntry { 6 | private String URL; 7 | private long time; 8 | private int results; 9 | private int depth; 10 | private int properties; 11 | private int size; 12 | 13 | DataEntry(String URL, long time, int results, int depth, int properties, int size) { 14 | this.URL = URL; 15 | this.time = time; 16 | this.results = results; 17 | this.depth = depth; 18 | this.properties = properties; 19 | this.size = size; 20 | } 21 | 22 | void printAsCsv(PrintStream ps) { 23 | ps.print(URL); 24 | ps.print(","); 25 | ps.print(time); 26 | ps.print(","); 27 | ps.print(results); 28 | ps.print(","); 29 | ps.print(depth); 30 | ps.print(","); 31 | ps.print(properties); 32 | ps.print(","); 33 | ps.print(size); 34 | ps.println(); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/PropertyEntry.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import java.io.PrintStream; 4 | 5 | class PropertyEntry { 6 | 7 | private String URL; 8 | private int values; 9 | private String selected; 10 | private long time; 11 | 12 | PropertyEntry(String URL, int values, String selected, long time) { 13 | this.URL = URL; 14 | this.values = values; 15 | this.selected = selected; 16 | this.time = time; 17 | } 18 | 19 | void printAsCsv(PrintStream ps) { 20 | ps.print(URL); 21 | ps.print(","); 22 | ps.print(values); 23 | ps.print(","); 24 | ps.print(selected); 25 | ps.print(","); 26 | ps.print(time); 27 | ps.println(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/RandomSearch.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import java.io.FileOutputStream; 4 | import java.io.IOException; 5 | import java.io.PrintStream; 6 | 7 | public class RandomSearch { 8 | 9 | private static final int TIMES = 10; 10 | 11 | public static void main(String[] args) throws InterruptedException, IOException { 12 | 13 | Thread[] threads = new Thread[TIMES]; 14 | SearchStats ts = new SearchStats(); 15 | 16 | for(int i = 0; i < TIMES; i++) { 17 | threads[i] = new SearchThread(ts); 18 | threads[i].setName("T"+i); 19 | } 20 | for(Thread t : threads) { 21 | t.start(); 22 | //t.join(); 23 | } 24 | 25 | for(Thread t : threads) 26 | t.join(); 27 | 28 | PrintStream dataStream = new PrintStream(new FileOutputStream("data.csv")); 29 | PrintStream propStream = new PrintStream(new FileOutputStream("property.csv")); 30 | 31 | System.out.println("Search times:"); 32 | ts.printDataStats(System.out); 33 | System.out.println("Properties times:"); 34 | ts.printPropertiesStats(System.out); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/RandomWeightList.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import java.util.*; 4 | import java.util.concurrent.ThreadLocalRandom; 5 | 6 | class RandomWeightList { 7 | 8 | private final Random random = ThreadLocalRandom.current(); 9 | private final NavigableMap map; 10 | private double total; 11 | 12 | RandomWeightList() { 13 | map = new TreeMap<>(); 14 | total = 0; 15 | } 16 | 17 | void add(E element, double weight) { 18 | total += weight; 19 | map.put(total, element); 20 | } 21 | 22 | E nextElement() { 23 | double key = random.nextDouble() * total; 24 | Map.Entry result = map.higherEntry(key); 25 | return result.getValue(); 26 | } 27 | 28 | int size() { 29 | return map.size(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/SearchStats.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import java.io.PrintStream; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | class SearchStats { 8 | 9 | private List dataStats; 10 | private List propertiesStats; 11 | 12 | SearchStats() { 13 | dataStats = new ArrayList<>(); 14 | propertiesStats = new ArrayList<>(); 15 | } 16 | 17 | synchronized void addDataSearch(DataEntry entry) { 18 | dataStats.add(entry); 19 | } 20 | 21 | synchronized void addPropertySearch(PropertyEntry entry) { 22 | propertiesStats.add(entry); 23 | } 24 | 25 | void printDataStats(PrintStream ps) { 26 | ps.println("URL,time,results,depth,properties,size"); 27 | for(DataEntry entry : dataStats) { 28 | entry.printAsCsv(ps); 29 | } 30 | } 31 | 32 | void printPropertiesStats(PrintStream ps) { 33 | ps.println("URL,values,selected,time"); 34 | for(PropertyEntry entry : propertiesStats) { 35 | entry.printAsCsv(ps); 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/testing/SearchThread.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.testing; 2 | 3 | import org.jsoup.Connection; 4 | import org.jsoup.Jsoup; 5 | import org.jsoup.nodes.Document; 6 | import org.jsoup.nodes.Element; 7 | import org.jsoup.select.Elements; 8 | 9 | import javax.json.Json; 10 | import javax.json.JsonArray; 11 | import javax.json.JsonObject; 12 | import javax.json.JsonReader; 13 | import java.io.InputStream; 14 | import java.net.URL; 15 | import java.net.URLEncoder; 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | import java.util.Random; 19 | import java.util.concurrent.ThreadLocalRandom; 20 | 21 | class SearchThread extends Thread { 22 | 23 | private static final String baseURL = "http://facet.dcc.uchile.cl/testfacet/"; 24 | private static final Object mutex = new Object(); 25 | private SearchStats stats; 26 | 27 | SearchThread(SearchStats ts) { 28 | stats = ts; 29 | } 30 | 31 | @Override 32 | public void run() { 33 | List currentProperties = new ArrayList<>(); 34 | String instance = "Q5"; 35 | Random random = ThreadLocalRandom.current(); 36 | 37 | int results; 38 | int depth = 0; 39 | try { 40 | do { 41 | // Construct url 42 | StringBuilder urlBuilder = new StringBuilder(); 43 | urlBuilder.append(baseURL); 44 | urlBuilder.append("search?instance="); 45 | urlBuilder.append(instance); 46 | for (String property : currentProperties) { 47 | urlBuilder.append("&properties="); 48 | urlBuilder.append(URLEncoder.encode(property, "UTF-8")); 49 | } 50 | String url = urlBuilder.toString(); 51 | 52 | Document resultsPage; 53 | int size; 54 | long start, end; 55 | 56 | //synchronized(mutex) { 57 | start = System.currentTimeMillis(); 58 | Connection connection = Jsoup.connect(url).timeout(60 * 1000); 59 | connection.get(); 60 | Connection.Response response = connection.response(); 61 | size = response.bodyAsBytes().length; 62 | resultsPage = response.parse(); 63 | end = System.currentTimeMillis(); 64 | //} 65 | long time = end - start; 66 | 67 | Element resultsInfo = resultsPage.select("div.col-md-9").first().child(0); 68 | results = Integer.parseInt(resultsInfo.text().replaceAll("[^0-9]", "")); 69 | 70 | if (results < 2) { 71 | DataEntry currentStep = new DataEntry(url, time, results, depth, 0, size); 72 | stats.addDataSearch(currentStep); 73 | break; 74 | } 75 | Elements possibleProperties = resultsPage.select("button[type=button][value^=P]"); 76 | 77 | RandomWeightList availableProperties = new RandomWeightList<>(); 78 | for (int j = 0; j < possibleProperties.size(); j++) { 79 | Element element = possibleProperties.get(j); 80 | String code = element.attr("value"); 81 | if (code.contains("##")) continue; 82 | double weight = possibleProperties.size() - j; 83 | availableProperties.add(code, weight); 84 | } 85 | String selectedProperty = availableProperties.nextElement(); 86 | 87 | DataEntry currentStep = new DataEntry(url, time, results, depth, availableProperties.size(), size); 88 | stats.addDataSearch(currentStep); 89 | 90 | urlBuilder = new StringBuilder(); 91 | urlBuilder.append(baseURL); 92 | urlBuilder.append("properties?instance="); 93 | urlBuilder.append(instance); 94 | for (String property : currentProperties) { 95 | urlBuilder.append("&selected="); 96 | urlBuilder.append(URLEncoder.encode(property, "UTF-8")); 97 | } 98 | urlBuilder.append("&property="); 99 | urlBuilder.append(selectedProperty); 100 | String propertiesUrl = urlBuilder.toString(); 101 | 102 | JsonArray jsonResponse; 103 | //synchronized(mutex) { 104 | start = System.currentTimeMillis(); 105 | InputStream stream = new URL(propertiesUrl).openStream(); 106 | JsonReader reader = Json.createReader(stream); 107 | jsonResponse = reader.readArray(); 108 | end = System.currentTimeMillis(); 109 | //} 110 | time = end - start; 111 | 112 | int index = random.nextInt(jsonResponse.size()); 113 | JsonObject selectedPo = jsonResponse.getJsonObject(index); 114 | String poCode = selectedPo.getString("id"); 115 | 116 | PropertyEntry propertyEntry = new PropertyEntry(propertiesUrl, jsonResponse.size(), poCode, time); 117 | stats.addPropertySearch(propertyEntry); 118 | currentProperties.add(poCode); 119 | depth++; 120 | } while (results > 1); 121 | } catch (Exception e) { 122 | System.err.println(this.getName() + " throws an exception"); 123 | e.printStackTrace(); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/ApiServlet.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | import cl.uchile.dcc.facet.core.DataFields; 4 | import org.apache.lucene.document.Document; 5 | import org.apache.lucene.index.IndexableField; 6 | import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; 7 | import org.apache.lucene.search.Query; 8 | import org.apache.lucene.search.ScoreDoc; 9 | import org.apache.lucene.search.TopDocs; 10 | 11 | import javax.json.Json; 12 | import javax.json.JsonArrayBuilder; 13 | import javax.json.JsonBuilderFactory; 14 | import javax.json.JsonObjectBuilder; 15 | import javax.servlet.http.HttpServletRequest; 16 | import javax.servlet.http.HttpServletResponse; 17 | import java.io.IOException; 18 | import java.io.PrintWriter; 19 | 20 | public class ApiServlet extends DataServlet { 21 | /* 22 | @Override 23 | public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException { 24 | response.setContentType("text/html; charset=UTF-8"); 25 | PrintWriter out = response.getWriter(); 26 | try { 27 | String queryText = request.getParameter("query"); 28 | StandardQueryParser queryParser = new StandardQueryParser(); 29 | Query query = queryParser.parse(queryText, DataFields.SUBJECT.name()); 30 | 31 | TopDocs results = dataSearcher.search(query, dataReader.numDocs()); 32 | ScoreDoc[] hits = results.scoreDocs; 33 | JsonBuilderFactory factory = Json.createBuilderFactory(null); 34 | JsonArrayBuilder resultsArray = factory.createArrayBuilder(); 35 | 36 | for(ScoreDoc hit : hits) { 37 | Document document = dataSearcher.doc(hit.doc); 38 | JsonObjectBuilder hitObject = factory.createObjectBuilder(); 39 | 40 | String id = document.get(DataFields.SUBJECT.name()); 41 | hitObject.add("id", id); 42 | 43 | String label = document.get(DataFields.LABEL.name()); 44 | label = label == null ? "" : label; 45 | hitObject.add("label", label); 46 | 47 | String altLabel = document.get(DataFields.ALT_LABEL.name()); 48 | altLabel = altLabel == null ? "" : altLabel; 49 | hitObject.add("alt labels", altLabel); 50 | 51 | String description = document.get(DataFields.DESCRIPTION.name()); 52 | description = description == null ? "" : description; 53 | hitObject.add("description", description); 54 | 55 | String rank = document.get(DataFields.VALUE.name()); 56 | hitObject.add("rank", Double.parseDouble(rank)); 57 | 58 | String type = document.get(DataFields.INSTANCE.name()); 59 | type = type == null ? "" : type; 60 | hitObject.add("type", type); 61 | 62 | JsonArrayBuilder pArray = factory.createArrayBuilder(); 63 | IndexableField[] ps = document.getFields(DataFields.P.name()); 64 | for(IndexableField p : ps) { 65 | pArray.add(p.stringValue()); 66 | } 67 | hitObject.add("properties", pArray); 68 | 69 | JsonArrayBuilder poArray = factory.createArrayBuilder(); 70 | IndexableField[] pos = document.getFields(DataFields.PO.name()); 71 | for(IndexableField po : pos) { 72 | poArray.add(po.stringValue()); 73 | } 74 | hitObject.add("property value", poArray); 75 | 76 | resultsArray.add(hitObject); 77 | } 78 | 79 | out.print(resultsArray.build().toString()); 80 | } catch(Exception e) { 81 | e.printStackTrace(out); 82 | } 83 | }*/ 84 | } 85 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/CodeNameValue.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | public class CodeNameValue { 4 | private String code; 5 | private String name; 6 | private int value; 7 | 8 | CodeNameValue(String code, String name, int value) { 9 | this.code = code; 10 | this.name = name; 11 | this.value = value; 12 | } 13 | 14 | public String getCode() { 15 | return code; 16 | } 17 | 18 | public String getName() { 19 | return name; 20 | } 21 | 22 | public int getValue() { 23 | return value; 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/DataServlet.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | import cl.uchile.dcc.facet.core.DataFields; 4 | import org.apache.lucene.document.Document; 5 | import org.apache.lucene.index.DirectoryReader; 6 | import org.apache.lucene.index.IndexReader; 7 | import org.apache.lucene.index.Term; 8 | import org.apache.lucene.search.IndexSearcher; 9 | import org.apache.lucene.search.TermQuery; 10 | import org.apache.lucene.search.TopDocs; 11 | import org.apache.lucene.store.FSDirectory; 12 | 13 | import javax.servlet.ServletException; 14 | import javax.servlet.http.HttpServlet; 15 | import java.io.IOException; 16 | import java.nio.file.Paths; 17 | 18 | public abstract class DataServlet extends HttpServlet { 19 | 20 | IndexSearcher searcher; 21 | IndexReader reader; 22 | 23 | String getLabelFromSubject(String subject, String lang) throws IOException { 24 | TopDocs result = searcher.search(new TermQuery(new Term(DataFields.SUBJECT.name(), subject)), 1); 25 | if(result.totalHits < 1) { 26 | return subject; 27 | } 28 | Document doc = searcher.doc(result.scoreDocs[0].doc); 29 | String label = doc.get(DataFields.LABEL.name() +"-"+ lang); 30 | if(label == null) label = subject; 31 | return label; 32 | } 33 | 34 | @Override 35 | public void init() throws ServletException { 36 | try { 37 | String LuceneDir = getServletContext().getInitParameter("IndexDirectory"); 38 | // open a reader for the directory 39 | reader = DirectoryReader.open(FSDirectory.open(Paths.get(LuceneDir))); 40 | // open a searcher over the reader 41 | searcher = new IndexSearcher(reader); 42 | } catch (IOException e) { 43 | System.err.println("FATAL: Cannot open Lucene folder"); 44 | throw new ServletException(); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/Entry.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | public class Entry { 4 | 5 | private String subject; 6 | private String label; 7 | private String description; 8 | private String altLabels; 9 | private String boosts; 10 | private String image; 11 | 12 | Entry(String subject, String label, String description, String altLabels, String boosts, String image) { 13 | this.subject = subject; 14 | this.label = label; 15 | this.description = description; 16 | this.altLabels = altLabels; 17 | this.boosts = boosts; 18 | this.image = image; 19 | } 20 | 21 | public String getSubject() { 22 | return subject; 23 | } 24 | 25 | public String getLabel() { 26 | return label; 27 | } 28 | 29 | public String getAltLabels() { 30 | return altLabels; 31 | } 32 | 33 | public String getDescription() { 34 | return description; 35 | } 36 | 37 | public String getBoosts() { return boosts; } 38 | 39 | public String getImage() { return image; } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/InstancesServlet.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | import cl.uchile.dcc.facet.core.InstancesFields; 4 | import org.apache.lucene.analysis.Analyzer; 5 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 6 | import org.apache.lucene.document.Document; 7 | import org.apache.lucene.index.DirectoryReader; 8 | import org.apache.lucene.index.IndexReader; 9 | import org.apache.lucene.index.IndexableField; 10 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 11 | import org.apache.lucene.search.*; 12 | import org.apache.lucene.store.FSDirectory; 13 | 14 | import javax.servlet.ServletException; 15 | import javax.servlet.http.HttpServlet; 16 | import javax.servlet.http.HttpServletRequest; 17 | import javax.servlet.http.HttpServletResponse; 18 | import java.io.IOException; 19 | import java.io.PrintWriter; 20 | import java.nio.file.Paths; 21 | import java.util.HashMap; 22 | 23 | public class InstancesServlet extends HttpServlet { 24 | 25 | private static final int DOCS_PER_PAGE = 15; 26 | private IndexSearcher searcher; 27 | 28 | @Override 29 | public void init() throws ServletException { 30 | try { 31 | String instancesDir = getServletContext().getInitParameter("InstancesDirectory"); 32 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(instancesDir))); 33 | // open a searcher over the reader 34 | searcher = new IndexSearcher(reader); 35 | } catch (IOException e) { 36 | System.err.println("FATAL: Cannot open Lucene folder"); 37 | throw new ServletException(); 38 | } 39 | } 40 | 41 | @Override 42 | public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { 43 | response.setContentType("text/html; charset=UTF-8"); 44 | PrintWriter out = response.getWriter(); 45 | 46 | String lang = request.getParameter("lang"); 47 | if(lang == null) lang = "en"; 48 | String labelFieldName = InstancesFields.LABEL.name() + "-" + lang; 49 | String altLabelFieldName = InstancesFields.ALT_LABEL.name() + "-" + lang; 50 | 51 | try { 52 | String keyword = request.getParameter("keyword"); 53 | if(keyword == null) { 54 | out.println("No keyword provided"); 55 | return; 56 | } 57 | 58 | Analyzer analyzer = new StandardAnalyzer(); 59 | HashMap boostsMap = new HashMap<>(); 60 | boostsMap.put(altLabelFieldName, 1f); 61 | boostsMap.put(labelFieldName, 1f); 62 | 63 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser( 64 | new String[] {labelFieldName, altLabelFieldName}, 65 | analyzer, boostsMap); 66 | 67 | BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); 68 | Query baseAutoCompleteQuery = null; 69 | if(keyword.matches("[A-Za-z ]*")) { 70 | keyword = keyword.trim(); 71 | baseAutoCompleteQuery = queryParser.parse(keyword + "*"); 72 | } 73 | Query baseLiteralQuery = queryParser.parse(keyword); 74 | /* 75 | DoubleValuesSource boostsSource = DoubleValuesSource.fromDoubleField(InstancesFields.RANK.name()); 76 | boostsSource = DoubleValuesSource.function(boostsSource, new ScoreBoostsOperator()); 77 | boostsSource = DoubleValuesSource.scoringFunction(boostsSource, (Double src, Double score) -> src*score); 78 | */ 79 | if(baseAutoCompleteQuery != null) 80 | queryBuilder.add(baseAutoCompleteQuery, BooleanClause.Occur.SHOULD); 81 | queryBuilder.add(baseLiteralQuery, BooleanClause.Occur.SHOULD); 82 | Query query = queryBuilder.build(); 83 | 84 | Sort sorting = new Sort(new SortField(InstancesFields.RANK.name(), SortField.Type.DOUBLE, true)); 85 | 86 | TopDocs results = searcher.search(query, DOCS_PER_PAGE, sorting); 87 | ScoreDoc[] hits = results.scoreDocs; 88 | if(hits.length == 0) throw new Exception(); 89 | 90 | for(ScoreDoc hit : hits) { 91 | Document doc = searcher.doc(hit.doc); 92 | String label = doc.get(labelFieldName); 93 | String q = doc.get(InstancesFields.ID.name()); 94 | if(label==null) label = q; 95 | String occurrences = doc.get(InstancesFields.FREQ_STORED.name()); 96 | if(label.contains(keyword)) { 97 | out.print(""); 104 | continue; 105 | } 106 | IndexableField[] altLabels = doc.getFields(altLabelFieldName); 107 | for(IndexableField altLabel : altLabels) { 108 | String name = altLabel.stringValue(); 109 | if(name.contains(keyword)) { 110 | out.print(""); 117 | break; 118 | } 119 | } 120 | } 121 | } catch (Exception e) { 122 | //e.printStackTrace(out); 123 | response.setStatus(HttpServletResponse.SC_NO_CONTENT); 124 | } 125 | } 126 | 127 | } 128 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/PropertiesServlet.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | import cl.uchile.dcc.facet.core.DataFields; 4 | import cl.uchile.dcc.facet.core.ValuesFields; 5 | import org.apache.lucene.analysis.Analyzer; 6 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 7 | import org.apache.lucene.document.Document; 8 | import org.apache.lucene.index.DirectoryReader; 9 | import org.apache.lucene.index.IndexReader; 10 | import org.apache.lucene.index.IndexableField; 11 | import org.apache.lucene.index.Term; 12 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 13 | import org.apache.lucene.search.*; 14 | import org.apache.lucene.store.FSDirectory; 15 | 16 | import javax.servlet.ServletException; 17 | import javax.servlet.http.HttpServletRequest; 18 | import javax.servlet.http.HttpServletResponse; 19 | import javax.json.*; 20 | import java.io.IOException; 21 | import java.io.PrintWriter; 22 | import java.io.StringReader; 23 | import java.nio.file.Paths; 24 | import java.util.*; 25 | 26 | public class PropertiesServlet extends DataServlet { 27 | 28 | private static final int DOCS_PER_PAGE = 50000; 29 | private IndexReader valuesReader; 30 | private IndexSearcher valuesSearcher; 31 | 32 | private String processCache(String rawJson, String lang) { 33 | JsonReader jsonReader = Json.createReader(new StringReader(rawJson)); 34 | JsonArray inArray = jsonReader.readArray(); 35 | JsonBuilderFactory factory = Json.createBuilderFactory(null); 36 | JsonArrayBuilder outArray = factory.createArrayBuilder(); 37 | 38 | for(JsonValue value : inArray) { 39 | if(!value.getValueType().equals(JsonValue.ValueType.OBJECT)) continue; 40 | JsonObject inEntry = (JsonObject) value; 41 | String id = inEntry.getJsonString("id").getString(); 42 | String name = inEntry.getJsonString("name-"+lang).getString(); 43 | JsonObjectBuilder outEntry = factory.createObjectBuilder(); 44 | outEntry.add("id", id); 45 | outEntry.add("name", name); 46 | outArray.add(outEntry); 47 | } 48 | 49 | return outArray.build().toString(); 50 | } 51 | 52 | @Override 53 | public void init() throws ServletException { 54 | super.init(); 55 | try{ 56 | String valuesDirectory = getServletContext().getInitParameter("ValuesDirectory"); 57 | valuesReader = DirectoryReader.open(FSDirectory.open(Paths.get(valuesDirectory))); 58 | valuesSearcher = new IndexSearcher(valuesReader); 59 | // Magic is everything! - Reinhardt 60 | } catch(IOException ioe) { 61 | System.err.println("FATAL: Cannot open Properties Lucene folder"); 62 | throw new ServletException(); 63 | } 64 | } 65 | 66 | @Override 67 | public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { 68 | response.setContentType("text/html; charset=UTF-8"); 69 | PrintWriter out = response.getWriter(); 70 | try { 71 | boolean hasProperties = false; 72 | Map values = new HashMap<>(); 73 | 74 | String lang = request.getParameter("lang"); 75 | if(lang == null) lang = "en"; 76 | String keyword = request.getParameter("keyword"); 77 | String instance = request.getParameter("instance"); 78 | String property = request.getParameter("property"); 79 | String[] selected = request.getParameterValues("selected"); 80 | 81 | if(selected != null) { 82 | Arrays.sort(selected); 83 | hasProperties = true; 84 | } 85 | 86 | boolean hasCache = false; 87 | Document valuesDoc = null; 88 | if(instance != null && hasProperties) { 89 | Query valuesQuery = new WildcardQuery(new Term(ValuesFields.BASE.name(), instance + "||*||" + property)); 90 | TopDocs valuesResults = valuesSearcher.search(valuesQuery, valuesReader.numDocs()); 91 | ScoreDoc[] hits = valuesResults.scoreDocs; 92 | for(ScoreDoc hit : hits) { 93 | Document doc = valuesSearcher.doc(hit.doc); 94 | String code = doc.get(ValuesFields.BASE.name()); 95 | String[] split = code.split("\\|\\|"); 96 | String[] resultsProperties = Arrays.copyOfRange(split, 1, split.length-1); 97 | Arrays.sort(resultsProperties); 98 | if(Arrays.equals(selected, resultsProperties)) { 99 | System.err.println("Loading values from cache for: " + code); 100 | hasCache = true; 101 | valuesDoc = doc; 102 | } 103 | } 104 | } else if(instance != null) { 105 | Query valuesQuery = new TermQuery(new Term(ValuesFields.BASE.name(), instance + "||" + property)); 106 | TopDocs valuesResults = valuesSearcher.search(valuesQuery, 1); 107 | if(valuesResults.totalHits > 0) { 108 | valuesDoc = valuesSearcher.doc(valuesResults.scoreDocs[0].doc); 109 | hasCache = true; 110 | System.err.println("Loading values from cache for: " + instance + "||" + property); 111 | } 112 | } 113 | 114 | if(hasCache) { 115 | String jsonFromDoc = valuesDoc.get(ValuesFields.VALUES.name()); 116 | String responseJson = processCache(jsonFromDoc, lang); 117 | out.print(responseJson); 118 | return; 119 | } else { 120 | System.err.println("Not cache found for values"); 121 | 122 | Analyzer analyzer = new StandardAnalyzer(); 123 | String labelFieldName = DataFields.LABEL.name() + "-" + lang; 124 | String altLabelFieldName = DataFields.ALT_LABEL.name() + "-" + lang; 125 | String descriptionFieldName = DataFields.DESCRIPTION.name() + "-" + lang; 126 | 127 | HashMap boostsMap = new HashMap<>(); 128 | boostsMap.put(altLabelFieldName, 2f); 129 | boostsMap.put(descriptionFieldName, 1f); 130 | boostsMap.put(labelFieldName, 5f); 131 | 132 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser( 133 | new String[] {labelFieldName, descriptionFieldName, altLabelFieldName}, 134 | analyzer, boostsMap); 135 | 136 | List queries = new ArrayList<>(); 137 | if(keyword != null && !keyword.trim().isEmpty()) 138 | queries.add(queryParser.parse(keyword)); 139 | if(instance != null && !instance.trim().isEmpty()) 140 | queries.add(new TermQuery(new Term(DataFields.TYPE.name(), instance))); 141 | if(selected != null && selected.length > 0) { 142 | for (String filter : selected) { 143 | if(filter.isEmpty()) continue; 144 | queries.add(new TermQuery(new Term(DataFields.PO.name(), filter))); 145 | } 146 | } 147 | queries.add(new TermQuery(new Term(DataFields.PROPERTY.name(), property))); 148 | 149 | BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); 150 | for(Query query : queries) 151 | queryBuilder.add(query, BooleanClause.Occur.MUST); 152 | Query query = queryBuilder.build(); 153 | 154 | TopDocs results = searcher.search(query, DOCS_PER_PAGE); 155 | 156 | ScoreDoc[] hits = results.scoreDocs; 157 | for(ScoreDoc hit : hits) { 158 | String subjectPrefix = getServletContext().getInitParameter("SubjectPrefix"); 159 | 160 | Document doc = searcher.doc(hit.doc); 161 | IndexableField[] pos = doc.getFields(DataFields.PO.name()); 162 | for(IndexableField po : pos) { 163 | String[] raw = po.stringValue().split("##"); 164 | if (!property.equals(raw[0])) continue; 165 | String code = raw[1]; 166 | if(!code.startsWith(subjectPrefix)) continue; 167 | if (values.containsKey(code)) continue; 168 | String name = getLabelFromSubject(code, lang); 169 | values.put(code, name); 170 | } 171 | } 172 | } 173 | 174 | // Build JSON response 175 | JsonBuilderFactory factory = Json.createBuilderFactory(null); 176 | JsonArrayBuilder array = factory.createArrayBuilder(); 177 | // Response 178 | for (Map.Entry entry : values.entrySet()) { 179 | if(entry.getValue() == null) continue; 180 | array = array.add(factory.createObjectBuilder() 181 | .add("id", property + "##" + entry.getKey()) 182 | .add("name", entry.getValue())); 183 | } 184 | out.print(array.build().toString()); 185 | } catch(Exception e) { 186 | e.printStackTrace(out); 187 | } 188 | } 189 | 190 | } 191 | -------------------------------------------------------------------------------- /src/cl/uchile/dcc/facet/web/SearchServlet.java: -------------------------------------------------------------------------------- 1 | package cl.uchile.dcc.facet.web; 2 | 3 | import cl.uchile.dcc.facet.core.DataFields; 4 | import cl.uchile.dcc.facet.core.InstancesFields; 5 | import cl.uchile.dcc.facet.core.ScoreBoostsOperator; 6 | import org.apache.lucene.analysis.Analyzer; 7 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 | import org.apache.lucene.document.Document; 9 | import org.apache.lucene.index.DirectoryReader; 10 | import org.apache.lucene.index.IndexReader; 11 | import org.apache.lucene.index.IndexableField; 12 | import org.apache.lucene.index.Term; 13 | import org.apache.lucene.queries.function.FunctionScoreQuery; 14 | import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 15 | import org.apache.lucene.search.*; 16 | import org.apache.lucene.store.FSDirectory; 17 | 18 | import javax.servlet.ServletException; 19 | import javax.servlet.http.HttpServletRequest; 20 | import javax.servlet.http.HttpServletResponse; 21 | import java.io.IOException; 22 | import java.io.PrintWriter; 23 | 24 | import java.lang.management.ManagementFactory; 25 | import java.nio.file.Paths; 26 | import java.util.*; 27 | import java.util.stream.Collectors; 28 | 29 | public class SearchServlet extends DataServlet { 30 | 31 | private static final int MAX_DOCS = 50; 32 | private IndexSearcher instancesSearcher; 33 | private IndexReader instancesReader; 34 | 35 | @Override 36 | public void init() throws ServletException { 37 | super.init(); 38 | try { 39 | String instancesDir = getServletContext().getInitParameter("InstancesDirectory"); 40 | instancesReader = DirectoryReader.open(FSDirectory.open(Paths.get(instancesDir))); 41 | instancesSearcher = new IndexSearcher(instancesReader); 42 | } catch(IOException ioe) { 43 | System.err.println("FATAL: Cannot open Lucene folder"); 44 | throw new ServletException(); 45 | } 46 | } 47 | 48 | @Override 49 | public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { 50 | response.setContentType("text/html; charset=UTF-8"); 51 | PrintWriter out = response.getWriter(); 52 | 53 | long startTime = System.currentTimeMillis(); 54 | String serviceId = out.toString().split("@")[1]; 55 | System.err.println("Service Id " + serviceId + " started"); 56 | long threadStartTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()); 57 | 58 | try { 59 | // Construct query from all specified filters 60 | List queries = new ArrayList<>(); 61 | String lang = request.getParameter("lang"); 62 | if(lang==null) lang = "en"; 63 | String keyword = request.getParameter("keyword"); 64 | String type = request.getParameter("instance"); 65 | String[] propertiesForm = request.getParameterValues("properties"); 66 | boolean hasProperties = false; 67 | if(propertiesForm != null) { 68 | for(String property : propertiesForm) { 69 | if(!property.trim().isEmpty()) { 70 | hasProperties = true; 71 | break; 72 | } 73 | } 74 | } 75 | if(hasProperties) 76 | Arrays.sort(propertiesForm); 77 | List selectedProperties = new ArrayList<>(); 78 | 79 | Analyzer analyzer = new StandardAnalyzer(); 80 | String labelFieldName = DataFields.LABEL.name() + "-" + lang; 81 | String altLabelFieldName = DataFields.ALT_LABEL.name() + "-" + lang; 82 | String descriptionFieldName = DataFields.DESCRIPTION.name() + "-" + lang; 83 | 84 | HashMap boostsMap = new HashMap<>(); 85 | boostsMap.put(altLabelFieldName, 2f); 86 | boostsMap.put(descriptionFieldName, 1f); 87 | boostsMap.put(labelFieldName, 5f); 88 | 89 | MultiFieldQueryParser queryParser = new MultiFieldQueryParser( 90 | new String[] {labelFieldName, descriptionFieldName, altLabelFieldName}, 91 | analyzer, boostsMap); 92 | 93 | if(keyword != null && !keyword.trim().isEmpty()) { 94 | keyword = keyword.trim(); 95 | queries.add(queryParser.parse(keyword)); 96 | } 97 | if(type != null && !type.trim().isEmpty()) { 98 | String instanceOfCode = getServletContext().getInitParameter("InstancesCode"); 99 | type = type.trim(); 100 | queries.add(new TermQuery(new Term(DataFields.TYPE.name(), type))); 101 | request.setAttribute("type", getLabelFromSubject(type, lang)); 102 | selectedProperties.add(instanceOfCode); 103 | } else 104 | type = null; 105 | 106 | if(hasProperties) { 107 | List checkedProperties = new ArrayList<>(); 108 | List labelProperties = new ArrayList<>(); 109 | for(String property : propertiesForm) { 110 | if(property.trim().isEmpty()) continue; 111 | checkedProperties.add(property); 112 | String[] split = property.split("##"); 113 | selectedProperties.add(split[0]); 114 | labelProperties.add(new CodeNameValue(getLabelFromSubject(split[0], lang), 115 | getLabelFromSubject(split[1], lang), 0)); 116 | queries.add(new TermQuery(new Term(DataFields.PO.name(), property))); 117 | } 118 | request.setAttribute("checked", checkedProperties); 119 | request.setAttribute("labels", labelProperties); 120 | } 121 | 122 | BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); 123 | for(Query q : queries) { 124 | queryBuilder.add(q, BooleanClause.Occur.MUST); 125 | } 126 | Query baseQuery = queryBuilder.build(); 127 | 128 | // Sort results by Lucene's score and PageRank score 129 | DoubleValuesSource boostsSource = DoubleValuesSource.fromDoubleField(DataFields.RANK.name()); 130 | boostsSource = DoubleValuesSource.function(boostsSource, new ScoreBoostsOperator()); 131 | boostsSource = DoubleValuesSource.scoringFunction(boostsSource, (src, score) -> src*score); 132 | Query query = new FunctionScoreQuery(baseQuery, boostsSource); 133 | 134 | List entries = new ArrayList<>(); 135 | Map propertiesMap = new HashMap<>(); 136 | 137 | boolean isCached = false; 138 | 139 | // Check if properties are cached 140 | if(type != null) { 141 | Query instancesQuery; 142 | boolean withProperties; 143 | if(hasProperties) { 144 | instancesQuery = new WildcardQuery(new Term(InstancesFields.ID.name(), type+"||*")); 145 | withProperties = true; 146 | } else { 147 | instancesQuery = new TermQuery(new Term(InstancesFields.ID.name(), type)); 148 | withProperties = false; 149 | } 150 | TopDocs instancesResults = instancesSearcher.search(instancesQuery, instancesReader.numDocs()); 151 | if(instancesResults.totalHits > 0) { 152 | Document instanceDoc = null; 153 | ScoreDoc[] hits = instancesResults.scoreDocs; 154 | if(!withProperties) { 155 | instanceDoc = instancesSearcher.doc(hits[0].doc); 156 | } else { 157 | System.err.println(Arrays.toString(propertiesForm)); 158 | for (ScoreDoc hit : hits) { 159 | Document currentDoc = instancesSearcher.doc(hit.doc); 160 | String docsCode = currentDoc.get(InstancesFields.ID.name()); 161 | String[] docProperties = docsCode.split("\\|\\|"); 162 | docProperties = Arrays.copyOfRange(docProperties, 1, docProperties.length); 163 | Arrays.sort(docProperties); 164 | if (Arrays.equals(propertiesForm, docProperties)) { 165 | instanceDoc = currentDoc; 166 | break; 167 | } 168 | } 169 | } 170 | if(instanceDoc != null) { 171 | // Cache found! 172 | IndexableField[] properties = instanceDoc.getFields(InstancesFields.PROPERTY.name()); 173 | if(properties.length == 0) { 174 | isCached = false; 175 | } else { 176 | System.err.println("DEBUG: Cache found for " + instanceDoc.get(InstancesFields.ID.name())); 177 | isCached = true; 178 | } 179 | for (IndexableField property : properties) { 180 | String value = property.stringValue(); 181 | String[] split = value.split("##"); 182 | String pCode = split[0]; 183 | if (selectedProperties.contains(pCode)) continue; 184 | int frequency = Integer.parseInt(split[1]); 185 | propertiesMap.put(pCode, frequency); 186 | } 187 | } 188 | } 189 | } 190 | 191 | System.err.println("Service "+serviceId+" read the cache if present at " + (System.currentTimeMillis()-startTime)); 192 | // Getting the results 193 | TopDocs results; 194 | if(!isCached) { 195 | results = searcher.search(query, 50000); 196 | System.err.println("DEBUG: Not cache found..."); 197 | } else { 198 | results = searcher.search(query, MAX_DOCS); 199 | } 200 | ScoreDoc[] hits = results.scoreDocs; 201 | 202 | System.err.println("Service "+serviceId+" finish searching... " + (System.currentTimeMillis()-startTime)); 203 | int counter = 0; 204 | 205 | for (ScoreDoc hit : hits) { 206 | counter++; 207 | Document doc = searcher.doc(hit.doc); 208 | Set docsProperties = new HashSet<>(); 209 | // Store only top 50 results to display 210 | if(counter <= 50) { 211 | String subject = doc.get(DataFields.SUBJECT.name()); 212 | String label = doc.get(labelFieldName); 213 | if(label == null) label = subject; 214 | String description = doc.get(descriptionFieldName); 215 | if(description == null) description = ""; 216 | StringBuilder stringBuilder = new StringBuilder(); 217 | IndexableField[] altLabels = doc.getFields(altLabelFieldName); 218 | for(IndexableField altLabel : altLabels) { 219 | String text = altLabel.stringValue(); 220 | if(stringBuilder.length() > 0) stringBuilder.append(", "); 221 | stringBuilder.append(text); 222 | } 223 | String image = doc.get(DataFields.IMAGE.name()); 224 | String boost = Double.toString(hit.score); 225 | Entry entry = new Entry(subject, label, description, stringBuilder.toString(), boost, image); 226 | entries.add(entry); 227 | } 228 | // Compute properties if needed 229 | if(!isCached) { 230 | String subjectPrefix = getServletContext().getInitParameter("SubjectPrefix"); 231 | IndexableField[] pos = doc.getFields(DataFields.PO.name()); 232 | for (IndexableField po : pos) { 233 | String[] raw = po.stringValue().split("##"); 234 | if(!raw[1].startsWith(subjectPrefix)) continue; 235 | String key = raw[0]; 236 | if (selectedProperties.contains(key)) continue; 237 | if (docsProperties.contains(key)) continue; 238 | docsProperties.add(key); 239 | if (propertiesMap.containsKey(key)) { 240 | propertiesMap.replace(key, propertiesMap.get(key) + 1); 241 | } else { 242 | propertiesMap.put(key, 1); 243 | } 244 | } 245 | if(counter%100==0) 246 | System.err.println("Service "+serviceId+" is computing facets... " + (System.currentTimeMillis()-startTime)); 247 | } 248 | } 249 | 250 | //System.err.println("Service "+serviceId+" get all results and facets if not cache was found at " + (System.currentTimeMillis()-startTime)); 251 | // Get most frequent properties 252 | List> propertiesList = propertiesMap.entrySet().stream() 253 | .sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())) 254 | .collect(Collectors.toList()); 255 | 256 | //int length = propertiesList.size() < 20 ? propertiesList.size() : 20; 257 | //List> propertiesSub = propertiesList.subList(0, length); 258 | List properties = new ArrayList<>(); 259 | 260 | for(Map.Entry property : propertiesList) { 261 | String propName = getLabelFromSubject(property.getKey(), lang); 262 | properties.add(new CodeNameValue(property.getKey(), propName, property.getValue())); 263 | } 264 | // Send to JSP 265 | request.setAttribute("results", entries); 266 | request.setAttribute("properties", properties); 267 | request.setAttribute("total", results.totalHits); 268 | 269 | System.err.println("Service "+serviceId+" forwarding to JSP at " + (System.currentTimeMillis()-startTime)); 270 | long threadTime = ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId()) - threadStartTime; 271 | System.err.println("Service "+serviceId+" total active time " + threadTime); 272 | getServletConfig().getServletContext().getRequestDispatcher("/results.jsp").forward(request,response); 273 | } catch (Exception e) { 274 | out.println("Error while performing query!"); 275 | e.printStackTrace(out); 276 | } 277 | } 278 | 279 | @Override 280 | public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException { 281 | doGet(request, response); 282 | } 283 | 284 | } 285 | -------------------------------------------------------------------------------- /toolinfo.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "grafa-faceted-search", 3 | "title": "GraFa: Faceted Search", 4 | "description": "Faceted Search Engine and Browser for Wikdiata", 5 | "url": "http://grafa.dcc.uchile.cl/", 6 | "keywords": "search, browse, wikidata, facet", 7 | "author": "José Moreno-Vega, Aidan Hogan", 8 | "repository": "https://github.com/joseignm/GraFa.git" 9 | } -------------------------------------------------------------------------------- /web/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | IndexDirectory 9 | /home/jmoreno/indexes/data_v4/ 10 | 11 | 12 | 13 | InstancesDirectory 14 | /home/jmoreno/indexes/types_v4 15 | 16 | 17 | 18 | ValuesDirectory 19 | /home/jmoreno/indexes/values_v4 20 | 21 | 22 | 23 | LabelsDirectory 24 | /home/jmoreno/indexes/label_v4 25 | 26 | 27 | 28 | InstancesCode 29 | P31 30 | 31 | 32 | 33 | SubjectPrefix 34 | Q 35 | 36 | 37 | 38 | SearchRDF 39 | cl.uchile.dcc.facet.web.SearchServlet 40 | 41 | 42 | 43 | Instances 44 | cl.uchile.dcc.facet.web.InstancesServlet 45 | 46 | 47 | 48 | Properties 49 | cl.uchile.dcc.facet.web.PropertiesServlet 50 | 51 | 52 | 53 | API 54 | cl.uchile.dcc.facet.web.ApiServlet 55 | 56 | 57 | 58 | SearchRDF 59 | /search 60 | 61 | 62 | 63 | Instances 64 | /instances 65 | 66 | 67 | 68 | Properties 69 | /properties 70 | 71 | 72 | 73 | API 74 | /api 75 | 76 | 77 | -------------------------------------------------------------------------------- /web/about.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | GraFa - About 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 |
25 |
26 | 27 | 28 | 29 | 30 |

About

31 |
32 |
33 |

34 |
35 |
36 |

Data updated: September 13, 2017

37 | 38 |

GraFa is a faceted browser for Wikidata triples made by José Ignacio Moreno and Aidan Hogan.

39 | 40 |

Important: GraFa is currently in development. Certain features or options can change in the future. 41 | Data updates won't be very frequent. Next update from dataset expected around mid March.

42 |
43 |

Contact info

44 |
45 | José Ignacio Moreno 46 |

47 | Aidan Hogan 48 |

49 | GitHub Repository 50 |

51 | GitHub Issue Tracker 52 |

53 |

FAQ

54 |
55 |

Data is incorrect or incomplete, what can I do?

56 |

GraFa uses all Wikidata triples, so you need to correct the data from Wikidata. 57 | Changes will be visible in the next update. The date of the last update will be displayed in this page.

58 |
59 |

What are those numbers with a P or Q at the beginning?

60 |

Those are identifiers for a resource. If you see them, it is because that resource does not have a label in the selected 61 | language. You can add a label directly in Wikidata, the change will be visible in the next update.

62 |
63 |

Will GraFa be available in more languages?

64 |

Yes. GraFa will use the labels from Wikidata for the specified language. Although, we will need help to translate 65 | the GraFa interface, if you want to help translate GraFa, contact us.

66 |
67 |

I found a problem using GraFa

68 |

Please let us known, you can post on the 69 | GitHub Issue Tracker 70 |

71 |
72 |
73 |
74 | 75 | -------------------------------------------------------------------------------- /web/css/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joseignm/GraFa/7e5af3145366ec8db0b5370683393519c72ab551/web/css/favicon.ico -------------------------------------------------------------------------------- /web/css/logoBM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joseignm/GraFa/7e5af3145366ec8db0b5370683393519c72ab551/web/css/logoBM.png -------------------------------------------------------------------------------- /web/css/styles.css: -------------------------------------------------------------------------------- 1 | .image-margin { 2 | margin-right: 20px; 3 | } 4 | 5 | .image-entry { 6 | width: 150px; 7 | height: auto; 8 | } 9 | 10 | html { 11 | position: relative; 12 | min-height: 100%; 13 | } 14 | body { 15 | margin-bottom: 60px; /* Margin bottom by footer height */ 16 | } 17 | .footer { 18 | position: absolute; 19 | bottom: 0; 20 | width: 100%; 21 | height: 60px; /* Set the fixed height of the footer here */ 22 | line-height: 60px; /* Vertically center the text there */ 23 | background-color: #f5f5f5; 24 | } -------------------------------------------------------------------------------- /web/index.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 2 | 3 | 4 | 5 | 6 | GraFa 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 102 | 103 |
104 | 105 |
106 |
107 | 108 | <% String lang = request.getParameter("lang"); 109 | if(lang==null) lang = "en"; %> 110 |
111 | <% if(lang.equals("es")) { %> 112 |

Navegación por facetas

113 | <% } else { %> 114 |

Faceted Browsing

115 | <% } %> 116 |
117 |
118 | 127 |
128 |
129 | 130 |

131 | 132 |
133 |
134 | 150 | 151 |
152 |
153 | <% if(lang.equals("es")) { %> 154 |

Búsqueda por tipo

155 | <% } else { %> 156 |

Search by type

157 | <% } %> 158 |
159 | 173 |
174 |
175 | <% if(request.getParameter("lang")!=null && !request.getParameter("lang").isEmpty()) {%> 176 | "> 177 | <% } %> 178 | 179 | 180 | 181 |
182 | <% if(lang.equals("es")) { %> 183 |
197 | 198 | <% if(lang.equals("es")) { %> 199 | 200 | <% } else { %> 201 | 202 | <% } %> 203 |
204 | 205 |
206 | 207 |
208 | <% if(lang.equals("es")) { %> 209 |

Búsqueda por nombre

210 | <% } else { %> 211 |

Search by name

212 | <% } %> 213 |

214 |
215 | <% if(request.getParameter("lang")!=null && !request.getParameter("lang").isEmpty()) {%> 216 | "> 217 | <% } %> 218 | 219 |
220 | <% if(lang.equals("es")) { %> 221 | 222 | <% } else { %> 223 | 224 | <% } %> 225 | 226 |
227 | 228 | <% if(lang.equals("es")) { %> 229 | 230 | <% } else { %> 231 | 232 | <% } %> 233 |
234 | 235 |
236 | 237 |
238 | 239 |
240 |
241 |
242 | 243 |
244 |
245 |
246 | About 247 |
248 |
249 |
250 | 251 | 252 | -------------------------------------------------------------------------------- /web/query.jsp: -------------------------------------------------------------------------------- 1 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 2 | 3 | 4 | API Query 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /web/results.jsp: -------------------------------------------------------------------------------- 1 | <%@ page import="java.util.List" %> 2 | <%@ page import="cl.uchile.dcc.facet.web.Entry" %> 3 | <%@ page import="cl.uchile.dcc.facet.web.CodeNameValue" %> 4 | <%@ page contentType="text/html;charset=UTF-8" language="java" %> 5 | 6 | 7 | 8 | 9 | 10 | <% String lang = request.getParameter("lang"); 11 | if(lang==null) lang = "en"; %> 12 | <% if(lang.equals("es")) { %> 13 | GraFa - Resultados 14 | <% } else { %> 15 | GraFa - Results 16 | <% } %> 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 106 | 107 | <% if(request.getAttribute("results")==null) { %> 108 |
109 | <% if(lang.equals("es")) { %> 110 | Error La consulta causó una excepción. 111 | <% } else { %> 112 | Error! Query triggered an exception. 113 | <% } %> 114 |
115 | <% } else { 116 | List entries = (List) request.getAttribute("results"); 117 | List properties = (List) request.getAttribute("properties"); 118 | List checkedProperties = (List) request.getAttribute("checked"); 119 | List labelsProperties = (List) request.getAttribute("labels"); 120 | %> 121 | 122 |
123 |
124 |
125 | 126 | 127 | 128 |
129 |
130 | <% if(lang.equals("es")) { %> 131 |

Resultados

132 | <% } else { %> 133 |

Results

134 | <% } %> 135 |
136 |
137 | 145 |
146 |
147 |
148 |
149 | <% if(lang.equals("es")) { %> 150 |

Buscando por:

151 | <% } else { %> 152 |

Current Query:

153 | <% } %> 154 | 155 |
156 | <% if(request.getParameter("keyword")!=null && !request.getParameter("keyword").isEmpty()) {%> 157 |
158 | <% if(lang.equals("es")) { %> 159 | 160 | <% } else { %> 161 | 162 | <% } %> 163 | "> 164 |
165 | <% } %> 166 | 167 | <% if(request.getParameter("lang")!=null && !request.getParameter("lang").isEmpty()) {%> 168 | "> 169 | <% } %> 170 | 171 | <% if(request.getAttribute("type")!=null) {%> 172 |
173 | <% if(lang.equals("es")) { %> 174 | 175 | <% } else { %> 176 | 177 | <% } %> 178 | "> 179 |
180 | 181 | <% } %> 182 | "> 183 | 184 | <% if(checkedProperties != null) { %> 185 | <% for(int i = 0; i < checkedProperties.size(); i++) { %> 186 |
187 | 188 | 189 |
190 | 191 | 192 | 195 | 196 |
197 |
198 | <% } %> 199 | <% } %> 200 | 201 |
202 | <% if(lang.equals("es")) { %> 203 |

Propiedades:

204 | <% } else { %> 205 |

Properties:

206 | <% } %> 207 | <% for(CodeNameValue property : properties) {%> 208 | 211 | <%= property.getName()+" ("+property.getValue()+" "%> 212 | <% if(lang.equals("es")) { %> 213 | <%="resultados)"%> 214 | <% } else { %> 215 | <%="results)"%> 216 | <% } %> 217 |
218 |
219 | 220 |
221 | <% } %> 222 |
223 |
224 |
225 | <% if(lang.equals("es")) { %> 226 |
Coincidencias totales: <%= request.getAttribute("total") %>
227 |
Mostrando primeros <%= entries.size() %> resultados
228 | <% } else { %> 229 |
Matching documents: <%= request.getAttribute("total") %>
230 |
Showing top <%= entries.size() %> results
231 | <% } %> 232 | <% for(Entry entry : entries) {%> 233 |
234 |
235 | <% if(entry.getImage() != null) {%> 236 | 237 | <% } %> 238 |

<%=entry.getLabel()%> <%=entry.getAltLabels()%>

239 |

<%=entry.getDescription()%>

240 |
241 |
242 | <% } %> 243 |
244 |
245 | <% } %> 246 | 247 | 248 | -------------------------------------------------------------------------------- /web/toolinfo.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "grafa", 3 | "title" : "GraFa", 4 | "description" : "Faceted Browsing over Wikidata triples.", 5 | "url" : "http://grafa.dcc.uchile.cl/", 6 | "keywords" : "facet, search, browser", 7 | "author" : "José Moreno, Aidan Hogan", 8 | "repository" : "https://github.com/joseignm/GraFa" 9 | } --------------------------------------------------------------------------------