├── .gitattributes ├── .gitignore ├── .metadata ├── .lock ├── .log ├── .mylyn │ ├── .taskListIndex │ │ ├── segments.gen │ │ └── segments_1 │ ├── .tasks.xml.zip │ ├── repositories.xml.zip │ └── tasks.xml.zip ├── .plugins │ ├── org.eclipse.core.resources │ │ ├── .history │ │ │ ├── 2 │ │ │ │ └── 700be8acc4df001417cff07ce66dc2f7 │ │ │ ├── 7 │ │ │ │ └── 00751c61bfdf001417cff07ce66dc2f7 │ │ │ ├── 9 │ │ │ │ └── e09c2da0c6df001417cff07ce66dc2f7 │ │ │ ├── 21 │ │ │ │ └── 304e57f9c4df001417cff07ce66dc2f7 │ │ │ ├── 29 │ │ │ │ ├── 80536693bcdf001417cff07ce66dc2f7 │ │ │ │ └── c0886fc4c6df001417cff07ce66dc2f7 │ │ │ ├── 38 │ │ │ │ └── 303c8b53c6df001417cff07ce66dc2f7 │ │ │ ├── 40 │ │ │ │ └── 50b0ababc3df001417cff07ce66dc2f7 │ │ │ ├── 57 │ │ │ │ └── 50ff10c2c6df001417cff07ce66dc2f7 │ │ │ ├── 61 │ │ │ │ └── a0b07720bfdf001417cff07ce66dc2f7 │ │ │ ├── 87 │ │ │ │ ├── 20c6713cc8df001417cff07ce66dc2f7 │ │ │ │ └── f0e54f06bddf001417cff07ce66dc2f7 │ │ │ ├── 89 │ │ │ │ ├── 40c6b5adc5df001417cff07ce66dc2f7 │ │ │ │ └── 80866619bddf001417cff07ce66dc2f7 │ │ │ ├── 91 │ │ │ │ └── 003ed708c6df001417cff07ce66dc2f7 │ │ │ ├── 93 │ │ │ │ ├── 2032f1d4bcdf001417cff07ce66dc2f7 │ │ │ │ └── 5021ea83c8df001417cff07ce66dc2f7 │ │ │ ├── 4d │ │ │ │ └── f0ba530abddf001417cff07ce66dc2f7 │ │ │ ├── 5b │ │ │ │ ├── 404be1f2c5df001417cff07ce66dc2f7 │ │ │ │ └── 4059508fc0df001417cff07ce66dc2f7 │ │ │ ├── 6a │ │ │ │ └── 60aed651c6df001417cff07ce66dc2f7 │ │ │ ├── 7c │ │ │ │ └── 905a39b4c0df001417cff07ce66dc2f7 │ │ │ ├── 7f │ │ │ │ └── 90011421c8df001417cff07ce66dc2f7 │ │ │ ├── 8d │ │ │ │ └── 00aa1494bedf001417cff07ce66dc2f7 │ │ │ ├── 9f │ │ │ │ └── a07a1becc6df001417cff07ce66dc2f7 │ │ │ ├── a0 │ │ │ │ └── 1097d93bc3df001417cff07ce66dc2f7 │ │ │ ├── a1 │ │ │ │ └── a0dc9672bddf001417cff07ce66dc2f7 │ │ │ ├── ac │ │ │ │ └── 30cdb0e6c7df001417cff07ce66dc2f7 │ │ │ ├── ae │ │ │ │ └── 406a738fc3df001417cff07ce66dc2f7 │ │ │ ├── b2 │ │ │ │ └── 90b74cf4c4df001417cff07ce66dc2f7 │ │ │ ├── b3 │ │ │ │ └── 5078c808c3df001417cff07ce66dc2f7 │ │ │ ├── b4 │ │ │ │ ├── 50798199c8df001417cff07ce66dc2f7 │ │ │ │ └── 9041819dc6df001417cff07ce66dc2f7 │ │ │ ├── c4 │ │ │ │ └── 5066cc9abcdf001417cff07ce66dc2f7 │ │ │ ├── c9 │ │ │ │ └── a0d79ff9bedf001417cff07ce66dc2f7 │ │ │ ├── d1 │ │ │ │ └── 700e30c3c7df001417cff07ce66dc2f7 │ │ │ ├── d4 │ │ │ │ └── 402a70bac2df001417cff07ce66dc2f7 │ │ │ ├── d7 │ │ │ │ └── 7031164ac4df001417cff07ce66dc2f7 │ │ │ ├── f2 │ │ │ │ └── a0391deac6df001417cff07ce66dc2f7 │ │ │ └── f5 │ │ │ │ └── f01ba4b2bddf001417cff07ce66dc2f7 │ │ ├── .projects │ │ │ └── lucene │ │ │ │ ├── .markers │ │ │ │ └── org.eclipse.jdt.core │ │ │ │ └── state.dat │ │ ├── .root │ │ │ ├── .indexes │ │ │ │ ├── history.version │ │ │ │ ├── properties.index │ │ │ │ └── properties.version │ │ │ └── 4.tree │ │ └── .safetable │ │ │ └── org.eclipse.core.resources │ ├── org.eclipse.core.runtime │ │ └── .settings │ │ │ ├── org.eclipse.core.resources.prefs │ │ │ ├── org.eclipse.debug.ui.prefs │ │ │ ├── org.eclipse.jdt.core.prefs │ │ │ ├── org.eclipse.jdt.launching.prefs │ │ │ ├── org.eclipse.jdt.ui.prefs │ │ │ ├── org.eclipse.m2e.discovery.prefs │ │ │ ├── org.eclipse.mylyn.context.core.prefs │ │ │ ├── org.eclipse.mylyn.java.ui.prefs │ │ │ ├── org.eclipse.mylyn.monitor.ui.prefs │ │ │ ├── org.eclipse.mylyn.tasks.ui.prefs │ │ │ ├── org.eclipse.team.cvs.ui.prefs │ │ │ ├── org.eclipse.team.ui.prefs │ │ │ ├── org.eclipse.ui.editors.prefs │ │ │ ├── org.eclipse.ui.ide.prefs │ │ │ ├── org.eclipse.ui.prefs │ │ │ └── org.eclipse.ui.workbench.prefs │ ├── org.eclipse.debug.core │ │ └── .launches │ │ │ ├── IndexFiles.launch │ │ │ └── SearchFiles.launch │ ├── org.eclipse.debug.ui │ │ ├── dialog_settings.xml │ │ └── launchConfigurationHistory.xml │ ├── org.eclipse.e4.workbench │ │ └── workbench.xmi │ ├── org.eclipse.jdt.core │ │ ├── 105210318.index │ │ ├── 1198961354.index │ │ ├── 136426494.index │ │ ├── 1667022092.index │ │ ├── 174553.index │ │ ├── 1811389807.index │ │ ├── 1926144101.index │ │ ├── 2248287622.index │ │ ├── 2452934326.index │ │ ├── 251782801.index │ │ ├── 2520022779.index │ │ ├── 2620426498.index │ │ ├── 2634579158.index │ │ ├── 265732991.index │ │ ├── 2804169155.index │ │ ├── 2959774643.index │ │ ├── 3018697931.index │ │ ├── 3038630371.index │ │ ├── 3158849313.index │ │ ├── 3491804422.index │ │ ├── 3731849190.index │ │ ├── 4039342660.index │ │ ├── 4134526084.index │ │ ├── 4677660.index │ │ ├── assumedExternalFilesCache │ │ ├── externalFilesCache │ │ ├── externalLibsTimeStamps │ │ ├── invalidArchivesCache │ │ ├── javaLikeNames.txt │ │ ├── nonChainingJarsCache │ │ ├── savedIndexNames.txt │ │ └── variablesAndContainers.dat │ ├── org.eclipse.jdt.launching │ │ ├── .install.xml │ │ └── libraryInfos.xml │ ├── org.eclipse.jdt.ui │ │ ├── OpenTypeHistory.xml │ │ ├── QualifiedTypeNameHistory.xml │ │ └── dialog_settings.xml │ ├── org.eclipse.ltk.core.refactoring │ │ └── .refactorings │ │ │ └── lucene │ │ │ └── 2015 │ │ │ └── 4 │ │ │ └── 15 │ │ │ ├── refactorings.history │ │ │ └── refactorings.index │ ├── org.eclipse.m2e.logback.configuration │ │ ├── 0.log │ │ └── logback.1.5.0.20140606-0033.xml │ └── org.eclipse.ui.workbench │ │ ├── dialog_settings.xml │ │ └── workingsets.xml └── version.ini ├── .recommenders ├── caches │ ├── identified-project-coordinates.json │ └── manual-mappings.json ├── index │ └── http___download_eclipse_org_recommenders_models_luna_ │ │ ├── _0.fdt │ │ ├── _0.fdx │ │ ├── _0.fnm │ │ ├── _0.frq │ │ ├── _0.nrm │ │ ├── _0.prx │ │ ├── _0.tii │ │ ├── _0.tis │ │ ├── segments.gen │ │ └── segments_1 └── repository │ └── http___download_eclipse_org_recommenders_models_luna_ │ └── org │ └── eclipse │ └── recommenders │ └── index │ └── 0.0.0-SNAPSHOT │ ├── _remote.repositories │ ├── index-0.0.0-20140605.014212-1.zip │ ├── maven-metadata-models.xml │ └── resolver-status.properties ├── Apache Lucene Part 1.pptx ├── Apache Lucene Part 2.pptx ├── Apache Lucene Part 3.pptx ├── Apache Lucene Part 4.pptx ├── README.md ├── index ├── _e.cfe ├── _e.cfs ├── _e.si ├── segments_f └── write.lock ├── lucene ├── .classpath ├── .project ├── .settings │ └── org.eclipse.jdt.core.prefs ├── bin │ └── lucene │ │ ├── IndexFiles$1.class │ │ ├── IndexFiles.class │ │ └── SearchFiles.class └── src │ └── lucene │ ├── IndexFiles.java │ └── SearchFiles.java └── test data ├── 092793.txt ├── 17arr.txt ├── 26008586.txt ├── 2600dcr1.txt ├── 2600dcr2.txt ├── 2600dcr3.txt ├── 2600dcrp.txt ├── 2600raid.txt ├── akronbbs.txt ├── arrest.txt ├── article.txt ├── bardbyte.txt ├── bawdybds.txt ├── bbscase.txt ├── bbscourt.txt ├── bbssting.txt ├── bmbmnul.txt ├── boardwat.txt ├── boh-20f8.txt ├── bp.txt ├── bucks.txt ├── busweek.txt ├── captmidn.txt ├── cptcrnch.txt ├── crime1.txt ├── crimes.txt ├── crunchy.txt ├── crypto.txt ├── csuicide.txt ├── cybrnazi.txt ├── dedsheep.txt ├── delorean.txt ├── dibbel.txt ├── digital.txt ├── digundrg.txt ├── donahue.txt ├── emmanuel.txt ├── fadehack.txt ├── freeware.txt ├── gangplan.txt ├── genius.txt ├── geraldo.txt ├── guilty.txt ├── hack-nsa.txt ├── hacker2.txt ├── hackers.txt ├── hackrfoe.txt ├── hackunit.txt ├── hcc.txt ├── hydebomb.txt ├── invest.txt ├── isdn.txt ├── jilted.txt ├── johnmedi.txt ├── kfyi-593.txt ├── latimes.txt ├── leftist.txt ├── legion.txt ├── livermor.txt ├── marsface.txt ├── marsmani.txt ├── menace.txt ├── mism6.txt ├── mitnick.txt ├── modbust.txt ├── multimed.txt ├── neidorfd.txt ├── night.txt ├── nighthak.txt ├── nsw-porn.txt ├── nzidcard.txt ├── pbust.txt ├── pcomhck.txt ├── phrack.txt ├── prank.txt ├── press.txt ├── pump.txt ├── r&e_bust.txt ├── races.txt ├── radhoax.txt ├── requiem.txt ├── saddam.txt ├── satlink.txt ├── shuttle.txt ├── sieze.txt ├── silvrspy.txt ├── ss&fbi.txt ├── sunhack.txt ├── supehack.txt ├── swpiracy.txt ├── syrp1124.txt ├── sysopjl.txt ├── teensac.txt ├── testfile.txt ├── testfile2.txt ├── testfile3.txt ├── theburli.txt ├── thegreat.txt ├── trib1118.txt ├── tubeshok.txt ├── v-world.txt ├── vanish.txt ├── videocon.txt ├── vs010799.txt └── washlod.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear on external disk 35 | .Spotlight-V100 36 | .Trashes 37 | 38 | # Directories potentially created on remote AFP share 39 | .AppleDB 40 | .AppleDesktop 41 | Network Trash Folder 42 | Temporary Items 43 | .apdisk 44 | -------------------------------------------------------------------------------- /.metadata/.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.lock -------------------------------------------------------------------------------- /.metadata/.mylyn/.taskListIndex/segments.gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.mylyn/.taskListIndex/segments.gen -------------------------------------------------------------------------------- /.metadata/.mylyn/.taskListIndex/segments_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.mylyn/.taskListIndex/segments_1 -------------------------------------------------------------------------------- /.metadata/.mylyn/.tasks.xml.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.mylyn/.tasks.xml.zip -------------------------------------------------------------------------------- /.metadata/.mylyn/repositories.xml.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.mylyn/repositories.xml.zip -------------------------------------------------------------------------------- /.metadata/.mylyn/tasks.xml.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.mylyn/tasks.xml.zip -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/2/700be8acc4df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/29/c0886fc4c6df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | String queryString = "computer crime^10"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/57/50ff10c2c6df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/5b/404be1f2c5df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/5b/4059508fc0df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | if (raw) { // output raw format 119 | System.out.println("doc="+hits[i].doc+" score="+hits[i].score); 120 | continue; 121 | } 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/6a/60aed651c6df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/89/40c6b5adc5df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/9/e09c2da0c6df001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | // String queryString = "computer^5 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.history/a1/a0dc9672bddf001417cff07ce66dc2f7: -------------------------------------------------------------------------------- 1 | import java.io.File; 2 | import java.io.FileInputStream; 3 | import java.io.IOException; 4 | import org.apache.pdfbox.cos.COSDocument; 5 | import org.apache.pdfbox.pdfparser.PDFParser; 6 | import org.apache.pdfbox.pdmodel.PDDocument; 7 | import org.apache.pdfbox.util.PDFTextStripper; 8 | 9 | public class PDFTextParser { 10 | 11 | // Extract text from PDF Document 12 | static String pdftoText(String fileName) { 13 | PDFParser parser; 14 | String parsedText = null;; 15 | PDFTextStripper pdfStripper = null; 16 | PDDocument pdDoc = null; 17 | COSDocument cosDoc = null; 18 | File file = new File(fileName); 19 | if (!file.isFile()) { 20 | System.err.println("File " + fileName + " does not exist."); 21 | return null; 22 | } 23 | try { 24 | parser = new PDFParser(new FileInputStream(file)); 25 | } catch (IOException e) { 26 | System.err.println("Unable to open PDF Parser. " + e.getMessage()); 27 | return null; 28 | } 29 | try { 30 | parser.parse(); 31 | cosDoc = parser.getDocument(); 32 | pdfStripper = new PDFTextStripper(); 33 | pdDoc = new PDDocument(cosDoc); 34 | pdfStripper.setStartPage(1); 35 | pdfStripper.setEndPage(5); 36 | parsedText = pdfStripper.getText(pdDoc); 37 | } catch (Exception e) { 38 | System.err 39 | .println("An exception occured in parsing the PDF Document." 40 | + e.getMessage()); 41 | } finally { 42 | try { 43 | if (cosDoc != null) 44 | cosDoc.close(); 45 | if (pdDoc != null) 46 | pdDoc.close(); 47 | } catch (Exception e) { 48 | e.printStackTrace(); 49 | } 50 | } 51 | return parsedText; 52 | } 53 | public static void main(String args[]){ 54 | System.out.println(pdftoText("F:/mini project/Kurose_Networks_6th_Edition.pdf")); 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.projects/lucene/.markers: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.core.resources/.projects/lucene/.markers -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.projects/lucene/org.eclipse.jdt.core/state.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.core.resources/.projects/lucene/org.eclipse.jdt.core/state.dat -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.root/.indexes/history.version: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.root/.indexes/properties.index: -------------------------------------------------------------------------------- 1 | /org.eclipse.core.resourcescontentCacheState2contentCacheTimestamp 1408509415297org.eclipse.jdt.corestateVersionNumber27 -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.root/.indexes/properties.version: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.root/4.tree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.core.resources/.root/4.tree -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.resources/.safetable/org.eclipse.core.resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.core.resources/.safetable/org.eclipse.core.resources -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | version=1 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.debug.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.debug.ui.PREF_LAUNCH_PERSPECTIVES=\r\n\r\n 3 | preferredTargets=default\:default| 4 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.codeComplete.visibilityCheck=enabled 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.compliance=1.8 6 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 7 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 8 | org.eclipse.jdt.core.compiler.source=1.8 9 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.jdt.launching.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.launching.PREF_VM_XML=\r\n\r\n\r\n\r\n\r\n\r\n 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.jdt.ui.prefs: -------------------------------------------------------------------------------- 1 | content_assist_disabled_computers=org.eclipse.jdt.ui.textProposalCategory\u0000org.eclipse.jdt.ui.javaTypeProposalCategory\u0000org.eclipse.jdt.ui.javaNoTypeProposalCategory\u0000org.eclipse.recommenders.calls.rcp.proposalCategory.templates\u0000org.eclipse.recommenders.chain.rcp.proposalCategory.chain\u0000org.eclipse.jdt.ui.javaAllProposalCategory\u0000 2 | content_assist_lru_history= 3 | content_assist_number_of_computers=19 4 | content_assist_proposals_background=255,255,255 5 | content_assist_proposals_foreground=0,0,0 6 | eclipse.preferences.version=1 7 | fontPropagated=true 8 | org.eclipse.jdt.ui.editor.tab.width= 9 | org.eclipse.jdt.ui.formatterprofiles.version=12 10 | org.eclipse.jdt.ui.javadoclocations.migrated=true 11 | org.eclipse.jdt.ui.text.code_templates_migrated=true 12 | org.eclipse.jdt.ui.text.custom_code_templates= 13 | org.eclipse.jdt.ui.text.custom_templates= 14 | org.eclipse.jdt.ui.text.templates_migrated=true 15 | org.eclipse.jface.textfont=1|Courier New|10.0|0|WINDOWS|1|0|0|0|0|0|0|0|0|1|0|0|0|0|Courier New; 16 | proposalOrderMigrated=true 17 | spelling_locale_initialized=true 18 | tabWidthPropagated=true 19 | useAnnotationsPrefPage=true 20 | useQuickDiffPrefPage=true 21 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.m2e.discovery.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.m2e.discovery.pref.projects= 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.mylyn.context.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | mylyn.attention.migrated=true 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.mylyn.java.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.mylyn.java.ui.run.count.3_10_0=1 3 | org.eclipse.mylyn.java.ui.run.count.3_1_0=1 4 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.mylyn.monitor.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.mylyn.monitor.activity.tracking.enabled.checked=true 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.mylyn.tasks.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | migrated.task.repositories.secure.store=true 3 | org.eclipse.mylyn.tasks.ui.filters.nonmatching=true 4 | org.eclipse.mylyn.tasks.ui.filters.nonmatching.encouraged=true 5 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.team.cvs.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | pref_first_startup=false 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.team.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.team.ui.first_time=false 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.ui.editors.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | overviewRuler_migration=migrated_3.1 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.ui.ide.prefs: -------------------------------------------------------------------------------- 1 | PROBLEMS_FILTERS_MIGRATE=true 2 | eclipse.preferences.version=1 3 | platformState=1408509415297 4 | quickStart=false 5 | tipsAndTricks=true 6 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | showIntro=false 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.core.runtime/.settings/org.eclipse.ui.workbench.prefs: -------------------------------------------------------------------------------- 1 | ENABLED_DECORATORS=org.eclipse.m2e.core.mavenVersionDecorator\:false,org.eclipse.egit.ui.internal.decorators.GitLightweightDecorator\:true,org.eclipse.jdt.ui.override.decorator\:true,org.eclipse.jdt.ui.interface.decorator\:false,org.eclipse.jdt.ui.buildpath.decorator\:true,org.eclipse.m2e.core.maven2decorator\:true,org.eclipse.mylyn.context.ui.decorator.interest\:true,org.eclipse.mylyn.tasks.ui.decorators.task\:true,org.eclipse.mylyn.team.ui.changeset.decorator\:true,org.eclipse.team.cvs.ui.decorator\:true,org.eclipse.ui.LinkedResourceDecorator\:true,org.eclipse.ui.SymlinkDecorator\:true,org.eclipse.ui.VirtualResourceDecorator\:true,org.eclipse.ui.ContentTypeDecorator\:true,org.eclipse.ui.ResourceFilterDecorator\:false, 2 | PLUGINS_NOT_ACTIVATED_ON_STARTUP=org.eclipse.m2e.discovery; 3 | eclipse.preferences.version=1 4 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.debug.core/.launches/IndexFiles.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.debug.core/.launches/SearchFiles.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.debug.ui/dialog_settings.xml: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 | 5 | 6 | 7 |
8 |
9 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.debug.ui/launchConfigurationHistory.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/105210318.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/105210318.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/1198961354.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/1198961354.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/136426494.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/136426494.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/1667022092.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/1667022092.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/174553.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/174553.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/1811389807.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/1811389807.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/1926144101.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/1926144101.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2248287622.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2248287622.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2452934326.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2452934326.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/251782801.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/251782801.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2520022779.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2520022779.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2620426498.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2620426498.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2634579158.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2634579158.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/265732991.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/265732991.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2804169155.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2804169155.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/2959774643.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/2959774643.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/3018697931.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/3018697931.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/3038630371.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/3038630371.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/3158849313.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/3158849313.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/3491804422.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/3491804422.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/3731849190.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/3731849190.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/4039342660.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/4039342660.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/4134526084.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/4134526084.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/4677660.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/4677660.index -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/assumedExternalFilesCache: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/externalFilesCache: -------------------------------------------------------------------------------- 1 | 3C:/Program Files/Java/jre1.8.0_20/lib/ext/sunec.jar>C:/Program Files/Java/jre1.8.0_20/lib/ext/access-bridge-64.jar5C:/Program Files/Java/jre1.8.0_20/lib/ext/nashorn.jar4C:/Program Files/Java/jre1.8.0_20/lib/ext/dns_sd.jarUC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/queryparser/lucene-queryparser-5.0.0.jar2C:/Program Files/Java/jre1.8.0_20/lib/charsets.jar7C:/Program Files/Java/jre1.8.0_20/lib/ext/sunmscapi.jar-C:/Program Files/Java/jre1.8.0_20/lib/jfr.jar3C:/Program Files/Java/jre1.8.0_20/lib/ext/dnsns.jar=C:/Program Files/Java/jre1.8.0_20/lib/ext/sunjce_provider.jar-C:/Program Files/Java/jre1.8.0_20/lib/jce.jar7C:/Program Files/Java/jre1.8.0_20/lib/ext/sunpkcs11.jar^C:/Users/chirag.agrawal/Desktop/lucene-5.0.0/analysis/common/lucene-analyzers-common-5.0.0.jar3C:/Program Files/Java/jre1.8.0_20/lib/resources.jarGC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/core/lucene-core-5.0.0.jar6C:/Program Files/Java/jre1.8.0_20/lib/ext/cldrdata.jar.C:/Program Files/Java/jre1.8.0_20/lib/jsse.jar8C:/Program Files/Java/jre1.8.0_20/lib/ext/localedata.jar5C:/Program Files/Java/jre1.8.0_20/lib/ext/jaccess.jar3C:/Program Files/Java/jre1.8.0_20/lib/ext/jfxrt.jar,C:/Program Files/Java/jre1.8.0_20/lib/rt.jarGC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/demo/lucene-demo-5.0.0.jar3C:/Program Files/Java/jre1.8.0_20/lib/ext/zipfs.jar -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/externalLibsTimeStamps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/externalLibsTimeStamps -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/invalidArchivesCache: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/javaLikeNames.txt: -------------------------------------------------------------------------------- 1 | java -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/nonChainingJarsCache: -------------------------------------------------------------------------------- 1 | ^C:/Users/chirag.agrawal/Desktop/lucene-5.0.0/analysis/common/lucene-analyzers-common-5.0.0.jarGC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/core/lucene-core-5.0.0.jarUC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/queryparser/lucene-queryparser-5.0.0.jarGC:/Users/chirag.agrawal/Desktop/lucene-5.0.0/demo/lucene-demo-5.0.0.jar -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/savedIndexNames.txt: -------------------------------------------------------------------------------- 1 | INDEX VERSION 1.127+C:\seminar\.metadata\.plugins\org.eclipse.jdt.core 2 | 4039342660.index 3 | 136426494.index 4 | 4134526084.index 5 | 4677660.index 6 | 2620426498.index 7 | 265732991.index 8 | 3158849313.index 9 | 3018697931.index 10 | 2804169155.index 11 | 2959774643.index 12 | 1198961354.index 13 | 2520022779.index 14 | 3491804422.index 15 | 2248287622.index 16 | 1667022092.index 17 | 251782801.index 18 | 105210318.index 19 | 174553.index 20 | 1811389807.index 21 | 3731849190.index 22 | 3038630371.index 23 | 2452934326.index 24 | 1926144101.index 25 | 2634579158.index 26 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.core/variablesAndContainers.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.jdt.core/variablesAndContainers.dat -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.launching/.install.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.launching/libraryInfos.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.ui/OpenTypeHistory.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.ui/QualifiedTypeNameHistory.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.jdt.ui/dialog_settings.xml: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 |
11 |
12 | 13 | 14 |
15 |
16 | 17 |
18 |
19 | 20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.ltk.core.refactoring/.refactorings/lucene/2015/4/15/refactorings.history: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.ltk.core.refactoring/.refactorings/lucene/2015/4/15/refactorings.index: -------------------------------------------------------------------------------- 1 | 1428696549258 Delete element 2 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.m2e.logback.configuration/0.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.metadata/.plugins/org.eclipse.m2e.logback.configuration/0.log -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.m2e.logback.configuration/logback.1.5.0.20140606-0033.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %date [%thread] %-5level %logger{35} - %msg%n 5 | 6 | 7 | OFF 8 | 9 | 10 | 11 | 12 | ${org.eclipse.m2e.log.dir}/0.log 13 | 14 | ${org.eclipse.m2e.log.dir}/%i.log 15 | 1 16 | 10 17 | 18 | 19 | 100MB 20 | 21 | 22 | %date [%thread] %-5level %logger{35} - %msg%n 23 | 24 | 25 | 26 | 27 | 28 | WARN 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.ui.workbench/dialog_settings.xml: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 |
16 | -------------------------------------------------------------------------------- /.metadata/.plugins/org.eclipse.ui.workbench/workingsets.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.metadata/version.ini: -------------------------------------------------------------------------------- 1 | #Fri Apr 24 02:42:19 IST 2015 2 | org.eclipse.core.runtime=2 3 | org.eclipse.platform=4.4.0.v20140606-1215 4 | -------------------------------------------------------------------------------- /.recommenders/caches/identified-project-coordinates.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /.recommenders/caches/manual-mappings.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.fdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.fdx -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.fnm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.fnm -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.frq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.frq -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.nrm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.nrm -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.tii: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.tii -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.tis: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/_0.tis -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/segments.gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/segments.gen -------------------------------------------------------------------------------- /.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/segments_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/index/http___download_eclipse_org_recommenders_models_luna_/segments_1 -------------------------------------------------------------------------------- /.recommenders/repository/http___download_eclipse_org_recommenders_models_luna_/org/eclipse/recommenders/index/0.0.0-SNAPSHOT/_remote.repositories: -------------------------------------------------------------------------------- 1 | #NOTE: This is an Aether internal implementation file, its format can be changed without prior notice. 2 | #Sun Apr 05 15:02:54 IST 2015 3 | index-0.0.0-20140605.014212-1.zip>models= 4 | -------------------------------------------------------------------------------- /.recommenders/repository/http___download_eclipse_org_recommenders_models_luna_/org/eclipse/recommenders/index/0.0.0-SNAPSHOT/index-0.0.0-20140605.014212-1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/.recommenders/repository/http___download_eclipse_org_recommenders_models_luna_/org/eclipse/recommenders/index/0.0.0-SNAPSHOT/index-0.0.0-20140605.014212-1.zip -------------------------------------------------------------------------------- /.recommenders/repository/http___download_eclipse_org_recommenders_models_luna_/org/eclipse/recommenders/index/0.0.0-SNAPSHOT/maven-metadata-models.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.eclipse.recommenders 4 | index 5 | 0.0.0-SNAPSHOT 6 | 7 | 8 | 20140605.014212 9 | 1 10 | 11 | 20140605014212 12 | 13 | 14 | zip 15 | 0.0.0-20140605.014212-1 16 | 20140605014212 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /.recommenders/repository/http___download_eclipse_org_recommenders_models_luna_/org/eclipse/recommenders/index/0.0.0-SNAPSHOT/resolver-status.properties: -------------------------------------------------------------------------------- 1 | #NOTE: This is an Aether internal implementation file, its format can be changed without prior notice. 2 | #Sun Apr 05 15:02:07 IST 2015 3 | maven-metadata-models.xml.lastUpdated=1428226327763 4 | -------------------------------------------------------------------------------- /Apache Lucene Part 1.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/Apache Lucene Part 1.pptx -------------------------------------------------------------------------------- /Apache Lucene Part 2.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/Apache Lucene Part 2.pptx -------------------------------------------------------------------------------- /Apache Lucene Part 3.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/Apache Lucene Part 3.pptx -------------------------------------------------------------------------------- /Apache Lucene Part 4.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/Apache Lucene Part 4.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Lucene Video Tutorials 2 | ##### Link to watch videos - https://youtu.be/FixCCGjLWGg?list=PLPjq0_ZOyVPN5a6YBjkEhA8Vd5PXHdkQO 3 | 4 | 1. This repository contains the power point presentations showed in the video.
5 | a. Apache Lucene Part 1.pptx
6 | b. Apache Lucene Part 2.pptx
7 | c. Apache Lucene Part 3.pptx
8 | d. Apache Lucene Part 4.pptx

9 | These PPT's corresponds to the four videos shown in the video lecture series.

10 | 2. It also contains the codes that were shown in the video.
11 | a. IndexFiles.java
12 | b. SearchFIles.java
13 | Link - https://github.com/chiragagrawal93/Lucene-Tutorials/tree/master/lucene/src/lucene

14 | 3. It also contains all the test data used for indexing.
15 | Link - https://github.com/chiragagrawal93/Lucene-Tutorials/tree/master/test%20data

16 | -------------------------------------------------------------------------------- /index/_e.cfe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/index/_e.cfe -------------------------------------------------------------------------------- /index/_e.cfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/index/_e.cfs -------------------------------------------------------------------------------- /index/_e.si: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/index/_e.si -------------------------------------------------------------------------------- /index/segments_f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/index/segments_f -------------------------------------------------------------------------------- /index/write.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/index/write.lock -------------------------------------------------------------------------------- /lucene/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /lucene/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | lucene 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /lucene/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.8 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=1.8 12 | -------------------------------------------------------------------------------- /lucene/bin/lucene/IndexFiles$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/lucene/bin/lucene/IndexFiles$1.class -------------------------------------------------------------------------------- /lucene/bin/lucene/IndexFiles.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/lucene/bin/lucene/IndexFiles.class -------------------------------------------------------------------------------- /lucene/bin/lucene/SearchFiles.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/lucene/bin/lucene/SearchFiles.class -------------------------------------------------------------------------------- /lucene/src/lucene/SearchFiles.java: -------------------------------------------------------------------------------- 1 | package lucene; 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one or more 5 | * contributor license agreements. See the NOTICE file distributed with 6 | * this work for additional information regarding copyright ownership. 7 | * The ASF licenses this file to You under the Apache License, Version 2.0 8 | * (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStreamReader; 23 | import java.nio.charset.StandardCharsets; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.Date; 27 | 28 | import org.apache.lucene.analysis.Analyzer; 29 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 30 | import org.apache.lucene.document.Document; 31 | import org.apache.lucene.index.DirectoryReader; 32 | import org.apache.lucene.index.IndexReader; 33 | import org.apache.lucene.queryparser.classic.QueryParser; 34 | import org.apache.lucene.search.IndexSearcher; 35 | import org.apache.lucene.search.Query; 36 | import org.apache.lucene.search.ScoreDoc; 37 | import org.apache.lucene.search.TopDocs; 38 | import org.apache.lucene.store.FSDirectory; 39 | 40 | /** Simple command-line based search demo. */ 41 | public class SearchFiles { 42 | 43 | private SearchFiles() {} 44 | 45 | /** Simple command-line based search demo. */ 46 | public static void main(String[] args) throws Exception { 47 | String usage = 48 | "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; 49 | if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 50 | System.out.println(usage); 51 | System.exit(0); 52 | } 53 | 54 | String index = "C:\\seminar\\index"; 55 | String field = "contents"; 56 | String queries = null; 57 | int repeat = 0; 58 | boolean raw = false; 59 | // regular search 60 | // String queryString = "computer"; 61 | 62 | //wildcard query 63 | // String queryString = "te*t"; 64 | 65 | //fuzzy query 66 | // String queryString = "roam~2"; 67 | 68 | //phrase query 69 | // String queryString = "\"apache lucene\"~5"; 70 | 71 | //boolean search 72 | // String queryString = "\"networks\" AND \"protocol\""; 73 | 74 | 75 | //boosted search 76 | String queryString = "computer^10 crime"; 77 | 78 | int hitsPerPage = 100; 79 | IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); 80 | IndexSearcher searcher = new IndexSearcher(reader); 81 | Analyzer analyzer = new StandardAnalyzer(); 82 | 83 | BufferedReader in = null; 84 | QueryParser parser = new QueryParser(field, analyzer); 85 | 86 | Query query = parser.parse(queryString); 87 | 88 | System.out.println("Searching for: " + query.toString(field)); 89 | searcher.search(query, null, 100); 90 | doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); 91 | reader.close(); 92 | } 93 | 94 | /** 95 | * This demonstrates a typical paging search scenario, where the search engine presents 96 | * pages of size n to the user. The user can then go to the next page if interested in 97 | * the next hits. 98 | * 99 | * When the query is executed for the first time, then only enough results are collected 100 | * to fill 5 result pages. If the user wants to page beyond this limit, then the query 101 | * is executed another time and all hits are collected. 102 | * 103 | */ 104 | public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, 105 | int hitsPerPage, boolean raw, boolean interactive) throws IOException { 106 | 107 | // Collect enough docs to show 5 pages 108 | TopDocs results = searcher.search(query, 5 * hitsPerPage); 109 | ScoreDoc[] hits = results.scoreDocs; 110 | 111 | int numTotalHits = results.totalHits; 112 | System.out.println(numTotalHits + " total matching documents"); 113 | 114 | int start = 0; 115 | int end = Math.min(numTotalHits, hitsPerPage); 116 | 117 | for (int i = start; i < end; i++) { 118 | Document doc = searcher.doc(hits[i].doc); 119 | String path = doc.get("path"); 120 | if (path != null) { 121 | System.out.println((i+1) + ". " + path); 122 | String title = doc.get("title"); 123 | if (title != null) { 124 | System.out.println(" Title: " + doc.get("title")); 125 | } 126 | } else { 127 | System.out.println((i+1) + ". " + "No path for this document"); 128 | } 129 | 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /test data/092793.txt: -------------------------------------------------------------------------------- 1 | From The Dialy Oklahoman Newspaper, September 27, 1993, Page 1: 2 | 3 | COMPUTER PORN CASE TRIGGERS LEGAL QUESTIONS 4 | By David Zizzo, Staff Writer 5 | 6 | Is talking to Anthony Davis hazardous to your health? 7 | 8 | In a manner of speaking, that's what numerous people with computers and modems 9 | apparently have been worrying about since late July. That's when Oklahoma 10 | City police raided Davis' software publishing firm and confiscated his 11 | sophisticated commercial computer bulletin board system. Authorities allege 12 | Davis was selling pornographic computerized materials on CD-ROM and through 13 | files downloaded over phone lines. 14 | 15 | Names of everyone who signed onto Davis' bulletin board service, those who 16 | downloaded or uploaded graphic files depicting sexual acts and those who 17 | didn't are in the hands of investigators. 18 | 19 | After the arrest, Earl Faubion, a police officer who runs a law enforcement 20 | oriented computer bulletin board system, got numerous inquiries from worried 21 | users. "There are a lot of people concerned," Faubion said. Many who used 22 | Davis' system for months and have been asking, "Am I in trouble?" Faubion, who 23 | ironically channeled much of his computer system's private mail through Davis' 24 | system before it was shut down, tells users that's out of his area of 25 | expertise. 26 | 27 | Bill Holmes, Davis' attorney, said bulletin board system operators fear their 28 | computers will be seized along with the electronic mail inside. 29 | 30 | The Davis bust sent a chill throughout the national computer community, said 31 | Jack Rickard, editor and publisher of Boardwatch magazine, a bulletin board 32 | newsletter published in Littleton, Colo. "It's causing chaos," he said. 33 | Rickard said Oklahoma City is being viewed "a little bit like clown city" in 34 | computer circles, since the explicit material Davis offered can be purchased 35 | in nearly every computer magazine and is carried by numerous bulletin boards. 36 | "This is off the shelf," he said. "It's considered pretty mundane stuff." 37 | 38 | Widespread availability is not a defense, however, attorney Holmes said. The 39 | allegedly illegal material was contained on four read-only memory compact 40 | discs and represented only a fraction of information offered by Davis. 41 | 42 | Oklahoma City police referred questions on the Davis case to the district 43 | attorney's office. An assistant prosecutor handling the case referred 44 | questions to District Attorney Bob Macy, who did not return several phone 45 | calls. 46 | 47 | The bust will test Oklahoma laws on "community standards" regarding 48 | pornography, said Mike Godwin, attorney for the Electronic Frontier 49 | Foundation. The Washington, D.C., advocacy group is funded by donors that 50 | include large software companies. "When you talk about community standards, 51 | who's the real community?" Godwin wonders. "Is it the city or ... the 52 | community of people on-line?" 53 | 54 | Holmes, a former Cleveland County prosecutor, calls Oklahoma's pornography law 55 | "an extremely broad statute." "I'm not sure it wouldn't include Playboy or 56 | Penthouse type publications," he said. 57 | 58 | Legal experts say Oklahoma's law appears aimed against sale or distribution of 59 | pornographic material. That leaves some to wonder whether passing a free copy 60 | to a friend constitutes distribution. Part of the law also appears to make 61 | possession a crime, but U.S. Supreme Court rulings have backed an 62 | individual's right to own such material, Holmes said. Also, free speech 63 | guarantees likely would protect those who use words to describe pornographic 64 | acts, he said. Explicit materials depicting children are covered under much 65 | stricter laws, but Davis' CDs contained no such material. 66 | 67 | Apart from the pornography question is the issue of electronic mail seized 68 | with Davis' computer equipment, correspondence most legal experts say is 69 | protected by federal law. Davis' computer was part of a large electronic mail 70 | system that shuttled messages across the country. 71 | 72 | Critics of the bust say likely lawsuits over the mail might show the 73 | government "has bitten off more than it can chew." They point to a case in 74 | Austin where the owner of a computer won a $50,000 damage award over E-mail 75 | seized by the Secret Service. The government also was liable for $1,000 for 76 | each user of the E-mail. In Davis' case, that could be up to 2,000 clients, 77 | or $2 million. "The city of Oklahoma City could be on the hook for that," 78 | Rickard said. 79 | 80 | Critics also say police over reached in grabbing Davis' entire system, 81 | shutting down his pay-for-play computer service, because of four CDs. 82 | Prosecutors are seeking forfeiture of the system, which includes a 13 gigabyte 83 | memory unit and 10 high speed modems. "They don't have to seize it any more 84 | than they have to seize the building when they confiscate a bookstore," said 85 | Godwin of the Electronic Frontier Foundation. 86 | 87 |  -------------------------------------------------------------------------------- /test data/17arr.txt: -------------------------------------------------------------------------------- 1 | 2 | <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 3 | <><><> 17 ARRESTED IN HIGH-TECH PHONE FRAUD <><><> 4 | <><><> by Lee Catterall and William Green <><><> 5 | <><><> Brought to you by Skatepunk <><><> 6 | <><><> CALL: THE METAL AE 201-879-6668 <><><> 7 | <><><> T.W.G.S.C. 209-526-3194 <><><> 8 | <><><> PIRATES HOLD 313-559-7199 <><><> 9 | <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 10 | 11 | HONOLULU-- It was high-tech robbery -- the theft and sale of 12 | long-distance telephone service. 13 | 14 | Seventeen people in 13 states were arrested Monday. The charge: 15 | conspiracy to commit access device fraud and wire fraud. 16 | The victims: U.S. Sprint, MCI Corp., Allnet Corp.,ITT, Tri-Tel 17 | Communications. 18 | 19 | Their loss: a reported $125 million in the past year. 20 | Steve Ramsay of the Secret Service said those charged sold access codes 21 | for $80 to $150 a month. 22 | 23 | An access code -- a telephone credit card number-- allows callers to 24 | use long-distance phone lines. Calls were billed to that card number. 25 | The distributors often sold one code to hundreds, maybe thousands of 26 | people, Ramsey said. Charged to one number in four days: $73,592. 27 | 28 | According to Assistant U.S. Attorney Robert Godbey: Computers were 29 | programmed to call the companies' long distance networks, then enter a 30 | creditcard number to pay for the call. 31 | 32 | If the number was rejected, the computer kept dialing until one was 33 | accepted. The successful number then was sold. 34 | 35 | Whoever was assinged the number, got the bill. 36 | "It was the miracle of technology, the computer took a simple chore and 37 | did it over and over and over," he said. 38 | 39 | The Secret Service caught them by setting up their own distributorship 40 | of illegal numbers -- a "sting operation" -- and "making controlled 41 | diliveries," Ramsey said. 42 | 43 | 44 | ------------------------------------------------------------------------------- 45 | This appeared in the April 27, 1988 copy of USA today and was typed in on this 46 | date by Skatepunk! If anyone has any Information not mentioned in this article 47 | please tell us all!! 48 | Skatepunk 49 | 50 | -------------------------------------------------------------------------------- /test data/2600dcr2.txt: -------------------------------------------------------------------------------- 1 | Subject: Secret Service Role Questioned in "2600 Washington Raid" 11/10/92 2 | From: newsbytes@clarinet.com 3 | Date: 10 Nov 92 21:03:23 GMT 4 | 5 | WASHINGTON, D.C., U.S.A., 1992 NOV 10 (NB) -- In the aftermath of an 6 | action on Friday, November 6th by members of the Pentagon City Mall 7 | Police and police from Arlington County, VA in which those attending a 8 | 2600 meeting at the mall were ordered from the premises, conflicting 9 | stories continue to appear. 10 | 11 | Attendees at the meeting have contended to Newsbytes that members of 12 | the mall police told them that they were "acting on behalf of the 13 | Secret Service." They also maintain that the mall police confiscated 14 | material from knapsacks and took film from someone attempting to 15 | photograph the action and a list of the names of security officers 16 | that one attendee was attempting to compile. 17 | 18 | Al Johnson, chief of security for the mall, denied these allegations 19 | to Newsbytes, saying, "No one said that we were acting on behalf of 20 | the Secret Service. We were merely enforcing our regulations. While 21 | the group was not disruptive, it had pulled tables together and was 22 | having a meeting in our food court area. The food court is for 23 | people eating and is not for meetings. We therefore asked the 24 | people to leave." 25 | 26 | Johnson denied that security personnel took away any film or lists 27 | and further said: "We did not confiscate any material. The group 28 | refused to own up to who owned material on the tables and in the 29 | vicinity so we collected it as lost material. If it turns out 30 | that anything did belong to any of those people, they are welcome 31 | to come in and, after making proper identification, take the 32 | material." 33 | 34 | In a conversation early on November 9th, Robert Rasor, Secret Service 35 | agent-in-charge of computer crime investigations, told Newsbytes that 36 | having mall security forces represent the Secret Service is not 37 | something that was done and, that to his knowledge, the Secret 38 | Service had no involvement with any Pentagon City mall actions 39 | on the previous Friday. 40 | 41 | A Newsbytes call to the Arlington County police was returned by a 42 | Detective Nuneville who said that her instructions were to refer all 43 | questions concerning the matter to agent David Adams of the Secret 44 | Service. She told Newsbytes that Adams would be providing all 45 | information concerning the involvement of both the Arlington Police and 46 | the Secret Service in the incident. 47 | 48 | Adams told Newsbytes: "The mall police were not acting as agents 49 | for the Secret Service. Beyond that, I can not confirm or deny 50 | that there is an ongoing investigation." 51 | 52 | Adams also told Newsbytes that: "While I cannot speak for the 53 | Arlington police, I understand that their involvement was due to 54 | an incident unrelated to the investigation." 55 | 56 | Marc Rotenberg, director of the Washington office of Computer 57 | Professionals for Social Responsibility (CPSR), told Newsbytes 58 | that "CPSR has reason to believe that the detention of people at 59 | the Pentagon City Mall last Friday was undertaken at the behest 60 | of the Secret Service, which is a federal agency." 61 | 62 | "If that is the case, then there was an illegal search of people 63 | at the mall. There was no warrant and no indication of probable 64 | illegal activity. This raises constitutional issues. We have 65 | undertaken the filing of a Freedom of Information Act (FOIA) 66 | request to determine the scope, involvement and purpose of the 67 | Secret Service in this action," he said. 68 | 69 | 2600 meetings are held on the evening of the first Friday of each 70 | month in public places and malls in New York City, Washington, 71 | Philadelphia, Cambridge, St. Louis, Chicago, Los Angeles and San 72 | Francisco. They are promoted by "2600 Magazine: The Hacker Quarterly" 73 | and are attended by a variety of persons interested in 74 | telecommunications and so-called "hacker issues." 75 | 76 | The New York meeting, the oldest of its kind, is regularly attended 77 | by Eric Corley a/k/a Emmanuel Goldstein, editor and publisher of 2600, 78 | hackers, journalists, corporate communications professionals and other 79 | interested parties. It is known to have been the subject of 80 | surveillance at various times by law enforcement agencies conducting 81 | investigations into allegations of computer crime. 82 | 83 | Corley told Newsbytes: "While I'm sure that meetings have been 84 | observed by law enforcement agencies, this is the only time that 85 | we have been harassed. It's definitely a freedom of speech 86 | issue." Corley also that he plans to be at the December meeting 87 | in Washington "to insure that it doesn't happen again." 88 | 89 | (Barbara E. McMullen & John F. McMullen/19921110) 90 | -------------------------------------------------------------------------------- /test data/2600dcr3.txt: -------------------------------------------------------------------------------- 1 | Subject: ****Conflicting Stories in 2600 Raid 11/11/92 2 | From: newsbytes@clarinet.com 3 | Date: 11 Nov 92 20:52:29 GMT 4 | 5 | WASHINGTON, D.C., U.S.A., 1992 NOV 11 (NB) -- In the on-going 6 | investigation of possible Secret Service involvement in the Friday, 7 | November 6th ejection of attendees at a "2600 meeting" from the premises 8 | of the Pentagon City Mall, opposing statements have come from the 9 | same source. 10 | 11 | Al Johnson, chief of security for the Pentagon City Mall, told 12 | Newsbytes on Monday, November 9th: "No one said that we were acting 13 | on behalf of the Secret Service. We were merely enforcing our 14 | regulations. While the group was not disruptive, it had pulled 15 | tables together and was having a meeting in our food court area. 16 | The food court is for people eating and is not for meetings. We 17 | therefore asked the people to leave." 18 | 19 | On the same day, Johnson was quoted was quoted in a Communications 20 | Daily article by Brock Meeks as saying, "As far as I'm concerned, 21 | we're out of this. The Secret Service, the FBI, they're the ones 22 | that ramrodded this whole thing." 23 | 24 | Newsbytes contacted Meeks to discuss the discrepancies in the stories and 25 | was informed that the conversation with Johnson had been taped and was 26 | available for review. This Newsbytes reporter listened to the tape (and 27 | reviewed a transcript). On the tape, Johnson was clearly heard to make the 28 | statement quoted by Meeks. 29 | 30 | He also said, "maybe you ought to call the Secret Service. They're 31 | handling this whole thing. We, we were just here," and, in response 32 | to a Meeks question about a Secret Service contact, "Ah.. you know, 33 | I don't have a contact person. These people were working on their 34 | own, undercover, we never got any names, but they definitely, we saw 35 | identification, they were here." 36 | 37 | Newsbytes contacted Johnson again on the morning of Wednesday, 38 | November 11 and asked him once again whether there was any Secret 39 | Service involvement in the action. Johnson said: "No, I told you that 40 | they were not involved." When it was mentioned that there was a 41 | story in Communications Daily, quoting him to the contrary, Johnson 42 | said, "I never told Meeks that. There was no Secret Service 43 | involvement." 44 | 45 | Informed of the possible existence of a tape quoting him to the contrary, 46 | Johnson said, "Meeks taped me? He can't do that. I'll show him that 47 | I'm not fooling around. I'll have him arrested." 48 | 49 | Johnson also said, "He asked me if the Secret Service was involved; I just 50 | told him that, if he thought they were, he should call them and ask them." 51 | 52 | Then Johnson again told Newsbytes that the incident was "just a mall 53 | problem. There were too many people congregating." 54 | 55 | In a related matter, Marc Rotenberg, director of the Washington office of 56 | Computer Professionals For Social Responsibility (CPSR), has announced 57 | that CPSR has filed a Freedom of Information Act (FOIA) request with the 58 | Secret Service asking for information concerning Secret Service 59 | involvement in the incident. 60 | 61 | Rotenberg told Newsbytes that the Secret Service has 10 days to respond 62 | to the request. He also said that CPSR "is exploring other legal 63 | options in this matter." 64 | 65 | The Secret Service, in earlier conversations with Newsbytes, has denied 66 | that the mall security was working on its behalf. 67 | 68 | In the actual incident, a group attending the informal meeting was 69 | disbanded and, according to attendees, had property confiscated. 70 | They also contend that security guards took film from someone 71 | photographing the confiscation as well as a list that someone was 72 | making of the guard's names. 73 | 74 | In his November 9th conversation with Newsbytes, Johnson denied that 75 | security personnel took away any film or lists and further said, "We 76 | did not confiscate any material. The group refused to own up to 77 | who owned material on the tables and in the vicinity so we collected 78 | it as lost material. If it turns out that anything did belong to any 79 | of those people, they are welcome to come in and, after making proper 80 | identification, take the material." 81 | 82 | 2600 meetings are promoted by "2600 Magazine: The Hacker Quarterly" and 83 | are held on the evening of the first Friday of each month in public 84 | places and malls in New York City, Washington, Philadelphia, Cambridge, 85 | St. Louis, Chicago, Los Angeles and San Francisco. They are regularly 86 | attended by a variety of persons interested in telecommunications and 87 | so-called "hacker issues." 88 | 89 | (Barbara E. McMullen & John F. McMullen/19921111) 90 | -------------------------------------------------------------------------------- /test data/akronbbs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/akronbbs.txt -------------------------------------------------------------------------------- /test data/arrest.txt: -------------------------------------------------------------------------------- 1 | 2 | TWO JUVENILES ARRESTED IN BBS EXTORTION CASE 3 | 4 | (April 26) 5 | Two 15-year-olds have been arrested by California authorities on charges 6 | they made death threats and tried to extort money from at least three computer 7 | bulletin board operators. 8 | As reported in Online Today yesterday, Encino, Calif., BBS sysop John Sands 9 | went to police after receiving demands for money and threats on his life in 10 | messages left on his board in March and the first part of this month. 11 | Police now say that the two suspects -- both reportedly sophomores at Seaside 12 | High School near Monterey, Calif. -- also are accused of making similar 13 | computerized threats on a Fort Ord-based Army staff sergeant and his teen-age 14 | son, and on another student at a private high school in the Pebble Beach, 15 | Calif., area. 16 | The juveniles were arrested at their homes in Marina by investigators who 17 | seized computer equipment allegedly used to transmit the threats, according to 18 | United Press International reporter Michael D. Harris. 19 | Authorities say the teen-agers, who allegedly demanded payments ranging from 20 | $50 to $350 from at least the three BBS sysops identified, were turned over to 21 | their parents while authorities decide how and where they should be prosecuted. 22 | The case came to light yesterday after reports surfaced that legal complaints 23 | had been filed by Sands, who is the chief electronics engineer for Capitol 24 | Records. Sands, 43, said the visitors to his private BBS demanded $350 from him 25 | and threatened violence if he didn't pay. 26 | The computer message instructed Sands to leave the money at a drop site in 27 | San Jose. During the investigation, Los Angeles police prepared a phony drop 28 | but never carried it out because they susequently received tips that led them 29 | to the youths, said police Lt. Fred Reno. 30 | Following the arrests, Sands told Harris, "It was a little scary because even 31 | though I suspected they were juveniles, I felt they were probably capable of 32 | carrying out their threats. I%m sorry to see any young person get in trouble, 33 | but I'm relieved that they were arrested." 34 | Meanwhile, according to Judy Smagula Farah of the Associated Press, the 35 | youths and Sands once belonged to the same computer club and used a code that 36 | gave them access to Sands' bulletin board. 37 | Reno said that if the suspects decide to plead guilty to the likely charges 38 | of accessing a computer system to extort money, their sentencing will occur in 39 | Monterey County. If they decide to fight the charges, the case -- the first of 40 | its kind in Los Angeles County -- will be prosecuted in Southern California. 41 |  -------------------------------------------------------------------------------- /test data/article.txt: -------------------------------------------------------------------------------- 1 | /------------------------------------------------------------\ 2 | / Ascii and you shall recieve \ 3 | / Analysis of an average Computer article \ 4 | =-=/ By The Slipped Disk \ =-= 5 | =-=\ / =-= 6 | \ Directed By The Slipped Disk Produced By The Slipped Disk / 7 | \ Pirates Chest.............................1-617-891-1349 / 8 | \------------------------------------------------------------/ 9 | 10 | I was leafing through the june issue of Computer Entertainment (It was 11 | Electronic Games until May) when I noticed an article about the nation's 12 | bulletin boards called "Ascii and you shall Recieve". The title was stupid, but 13 | I thought "What the hell? Can't hurt." It did. It was boring, and there was so 14 | much that I thought about saying to the Author, Roger Rapoport, that I was 15 | ready to explode. So I have done the next best thing. I am going to retype the 16 | entire article here. But with one small addition.... My personal comments about 17 | each sentence or group of sentences will follow it or them in squiggly quotes. 18 | (Also known as {}.) This is because he uses the regular Parenthesis and I don't 19 | want to screw it up. So, here it goes......... 20 | 21 | 22 | Ascii and you shall Recieve 23 | By Roger Rapoport {What a name.} 24 | ================================ 25 | 26 | If you've got a computer Modem, {And I know you do,} you probably know that 27 | you can do all sorts of exciting things: Send electronic mail (too bad if your 28 | granmother doesn't have a computer) {Very funny, asshole.}, get the latest 29 | stock market quotes {Everybody talks about that. Nobody does it.}, buy airline 30 | tickets, pay bills. Not very exciting, I know. Then how about tapping {Tapping? 31 | really now.} into the less corporate, "Underground" {Underground? Nice thing to 32 | say about a BBS.} computer bulletin boards that are proliferating around the 33 | country? Through the miracle {Say "Hard work", Roger.} of digital 34 | communication, that very same computer and modem can tell you who killed more 35 | indians than the Lone Ranger {Do you Care?}, why Diskotech and San Leandro High 36 | School in California is believed to be having an Affair with Mr. C-brain 37 | {Again, Who cares?}, and that a nanosecond is the amount of time it takes an 38 | English governess to realize the kids are up to something. {Huh?} 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |  -------------------------------------------------------------------------------- /test data/bbssting.txt: -------------------------------------------------------------------------------- 1 | -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:- 2 | 3 | POLICE SET UP BULLETIN BOARD STINGS 4 | 5 | 6 | By Jim Forbes 7 | Infoworld Staff 8 | 9 | AUSTIN, TX - Law enforcement officials here have joined a growing number of 10 | police agencies nationwide running "sting" operations to catch persons using 11 | bulletin boards for illegal purposes. 12 | 13 | Based on information posted on a bulletin board it operated, the Austin 14 | Police Department said it has been able to turn off two pirate boards here and 15 | expects shortly to make a number of arrests for misdemeanor violations of 16 | Texas' newly enacted computer crime law. 17 | 18 | For more than two years, the department secretly ran a board called the 19 | Underground Tunnel, which was set up to appear as a bulletin board run by a 20 | system operator called Pluto. But late last month - to the surprise of the 21 | board's more than 1,000 users - Pluto was revealed as Sgt. Robert Ansley, a 22 | seven-year veteran of the police department. 23 | 24 | "Most of the users were people interested primarily in several on-line 25 | fantasy games or in electronic messaging," Ansley said. "To get to the levels 26 | where people posted information on how to crash corporate systems, the user had 27 | to ask for increased access. We were very careful not to solicit or entrap 28 | anyone into leaving illegal information." 29 | 30 | The Austin police department disclosure caught most of the board's users by 31 | surprise. "I liked the board's electronic messaging capabilities," said user 32 | Michael Whalen, the managing editor of the Daily Texan, the student newspaper 33 | of the University of Texas here. "I was really surprised at how the officer 34 | was able to pull this off." 35 | 36 | What the police found, according to Ansley, included access codes belonging 37 | to the world's largest credit reporting organization, TRW Information Services 38 | Systems Division of Orange, California. "Most offenders seem to be real big on 39 | TRW," said Ansley. 40 | 41 | Sting and intelligence gathering bulletin board operations are on the rise 42 | throughout the country, according to law enforcement officials. Several police 43 | departments nationwide have already used bulletin boards to track down and 44 | arrest microcomputer users who post illegally obtained calling card codes, 45 | mainframe access procedures and passwords, or other confidential information. 46 | According to one high-lvel West Coast law enforcement officer who declined to 47 | be identified, federal officials are now joining local authorities in running 48 | bulletin boards in several key metropolitan areas. 49 | 50 | "You better believe law enforcement agencies are interested and, in some 51 | cases, running bulletin boards," said Dan Pasquale, a sergeant with the 52 | Fremont, California, police department. Last month, police in Fremont capped 53 | three and a half months of bulletin board operations by arresting eight 54 | individuals for alleged credit card fraud, misuse of telephone credit card 55 | operations, and technical trespass. Pasquale said most corporations whose 56 | passwords or calling card numbers were posted on Fremont's board were unaware 57 | that their information had been compromised. 58 | 59 | Although police are pleased with their results, some users say they feel the 60 | sting bulletin boards are unfair to both innocent users and suspected criminals 61 | alike. Whalen said students at the University of Texas used the board 62 | extensively, and he claimed that some people accused of posting access codes 63 | and other information on the board felt they had been entrapped when they 64 | discovered that the board was a police sting operation. 65 | 66 | Whalen also said that some users where concerned about the privacy and 67 | sanctity of electronic mail left on the board. "Ansley said users are foolish 68 | if they don't think a system operator reads the mail on the board," he added. 69 | 70 | Indeed, as police turn increasingly to bulletin boards to catch suspected 71 | criminals, the issue of entrapment has also become a growing concern, one to 72 | which police are sensitive. 73 | 74 | "At no time did the police department urge users to leave access codes, 75 | applications, or passwords for corporate computers on the Tunnel," Ansley said. 76 | 77 | To prove entrapment, a suspect would have to cleary show that a government 78 | agent offered some type of inducement to promote criminal activity, said Jim 79 | Harrington, the legal director of the Texas Civil Liverties Union here. "The 80 | whole are of police gaining information on [criminal activities] by reading 81 | electronic mail is very interesting." 82 | 83 | Fremont police held a series of meetings with a district attorney before 84 | they started the board, according to Pasquale. "We established a point where 85 | entrapment began and made sure we never crossed that point," he said. "In 86 | fact, messages on the board were scripted in conjunction with the district 87 | attorney's office." 88 | 89 | -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:- 90 | 91 | Downloaded From P-80 Systems 304-744-2253 92 | 93 | -------------------------------------------------------------------------------- /test data/bmbmnul.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | *Student Disciplined for Bomb Manual* 5 | ----------------------------------- 6 | 7 | Collinsville Herald-Journal 8 | 3/21/93-Pg. 7-A 9 | 10 | Martin Richter 11 | 12 | 13 | Three Collinsville High School Students were disciplined 14 | after one was caught reading a bomb-makeing manual in class-a 15 | chilling reminder of just how easy information on bomb makeing 16 | is to come by. 17 | 18 | The proliferation of such information has been a topic of 19 | conversation in the wake of the bombing of the Trade Center 20 | in New York.In published reports,investigatorshave said they 21 | found bom-making manuals in apartments of suspects raided 22 | after the bombing. 23 | 24 | Collinsville Det. Bob Vecchetti who handled the case,said a 25 | 16-year-old student who recently transferred to CHS from out 26 | of state had gotten the information from a computer network and 27 | brought it here. 28 | 29 | "That's what the one teenager said.",Vachetti said."He likes 30 | computers,and he found the printout,and he printed out the 31 | literature." 32 | 33 | "The three just handed them from one to the other,just for 34 | reading material.The third kid brought it to school and was caught 35 | with it." 36 | 37 | Vechetti said it appeared that the three students had no intention of 38 | building a bomb,and had the information mainly out of curiosity. 39 | 40 | "Basicly we gave them a lectureon why they shouldn't readthat type of 41 | material,"he said."that's the wrong kind of curosity,in my opinion." 42 | 43 | He said the information was "a pretty good sized pamphlet" containing 44 | at least 20 different catagories of bombs. 45 | 46 | Police Chief John Swindlee said he has always been concerned about how 47 | easy it is to obtain information on bomb makeing. 48 | 49 | "From past experiance,i know that type of thing is easily available in 50 | bookstores,"Swindle said."I don't think it's very difficult to get a book 51 | on it...I don't like to see books like `The Anarchist Cookbook' where they 52 | into detail on how to hurt people." 53 | 54 | But he added that haveing such information is not against the law. 55 | 56 | "There's nothing we can do locally,with the First Amendment,"he said. 57 | The world Trade Center blast has reminded Americans that they arn't immune to 58 | terrorism in the United States.While Collinsville obviosly is not high 59 | on terrorisms' list of targets,Swindle said it isn't inconceivable that 60 | there could someday be an incident in this area. 61 | 62 | "Our biggest danger,I would think,would be that we would accidently 63 | come on contact with these people,with the interstate highways and the 64 | motels here- maybe stop them for something like that,"Swindle said. 65 | 66 | Swindle recalled an incident a year or two agoin which a powerfull 67 | pipe bomb was used to blow up some mailboxes here.And he remembered 68 | responding to a suicide at a local motel when he was still a detective, 69 | when a small box wit ha button on it was found next to the body. 70 | "Fortunately i didn't press the button,"Swindle said."It was rigged to 71 | a pipe bomb under the bed. 72 | (END) 73 | 74 | -------------------------------------------------------------------------------- /test data/boardwat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/boardwat.txt -------------------------------------------------------------------------------- /test data/boh-20f8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/boh-20f8.txt -------------------------------------------------------------------------------- /test data/bucks.txt: -------------------------------------------------------------------------------- 1 | COMPILED FROM NEWSWEEK DEC 23, 1985 2 | 3 | LOTS OF PEOPLE LOVE NEW YORK'S CEN- 4 | TRAL PARK, BUT FEW LOVE IT AS MUCH 5 | AS MILLIONAIRE GEORGE DELACORTE. 6 | OVER THE YEARS, THE 92-YEAR-OLD 7 | PHILANTHROPIST PAID FOR THE PARK'S 8 | OUTDOOR DELACORTE THEATRE, THE 9 | ANIMATED DELACORTE CLOCK AT THE 10 | PARK'S ZOO AND THE FAMOUS ALICE 11 | IN WONDERLAND STATUE NEAR THE BOAT 12 | POND. 13 | WALKING THROUGH THE PARK ONE MOR- 14 | NING LAST WEEK, DELACORTE AND HIS 15 | WIFE VALERIE, 66, WERE MUGGED BY 16 | KNIFE-WIELDING ATTACKERS WHO TOOK 17 | $200 AND MRS. DELACORTE'S $5,500 18 | MINK COAT -- BRUISING HIS FACE 19 | AND NICKING HER HAND IN THE PRO- 20 | CESS. DELACORTE SAYS THE ATTACK 21 | WON'T ALTER HIS 60-YEAR-OLD CUS- 22 | TOM OF WALKING DAILY THROUGH THE 23 | PARK -- BUT HE *WAS* OFFENDED BY 24 | HIS ASSAILANT'S LACK OF MANNERS. 25 | "AFTER THEY TOOK MY MONEY THEY 26 | COULD HAVE SAID, 'THANKS FOR 27 | BUILDING THAT STATUE'," HE SAID. 28 | "BUT THEY DIDN'T." 29 |  MANNERS. 30 | "AF -------------------------------------------------------------------------------- /test data/crime1.txt: -------------------------------------------------------------------------------- 1 | 2 | MAY 1990 3 | COMPUTERWORLD 4 | COMPUTER CRIME BECOMING "LESS SOPHISTICATED" 5 | by Margie Wylie 6 | 7 | MENLO PARK, Calif. - Despite all 8 | the media attention surrounding his 9 | conviction, Robert Morris does not 10 | present the profile of a basic computer 11 | criminal, nor is his crime typical 12 | throughout the industry. 13 | 14 | On the contrary, less computer 15 | crime is being committed by those 16 | who can be characterised as "hackers", 17 | and the crimes committed are far 18 | less sophisticated, says a report 19 | issued by the National Centre for 20 | Computer Crime Data (NCCCD) in the 21 | United States. 22 | 23 | In fact, it is becoming increasingly 24 | difficult to identiy the typical perpertrator 25 | as the bright, young white male, 26 | said Buck Bloombecker, director of the 27 | NCCCD. The report states that in 28 | California, 32% of those arrested for 29 | computer crimes were women, and 43% 30 | were minorities. 31 | 32 | The study also revealed the changing 33 | nature of computer crimes. A nationwide 34 | survey of 2500 prosecutors concluded that 34% 35 | of computer crimes in 1988 involved the 36 | theft of services. That figure, up from 37 | 10% in 1986, is indicative of the type 38 | of computer crime being committed today, 39 | Blombecker said. 40 | 41 | "More and more computer crime is of the 42 | garden variety," Bloombecker said. 43 | "What we are facing is the 'democratisation' 44 | of computer crime." 45 | 46 | Computer crime has taken a heavy toll on 47 | US business. In 1988, the cost of 48 | computer crimes tallied to an astonishing 49 | $US555 million, 930 personnel hours, and 50 | 16.3 years of computer service. Further, 51 | in 12% of cases, the victims were individuals, 52 | not corporations or networks. 53 | 54 | Still, Morris' conviction may play a 55 | role in preventing computer crime because 56 | it demonstrates that existing computer 57 | crime laws are sufficient, Bloombecker 58 | said. Passing more laws like the 59 | antivirus bill introduced by Republican 60 | Wally Herger, is merely an academic 61 | exercise, he said. "What we need are 62 | more prosecutions under exisisting laws. 63 | That takes time and money." 64 | 65 | Marc Rotenburg, director of Computer 66 | Professionals for Social Responsibility, 67 | suggested that Congress boost funding for 68 | computer security research rather than 69 | pass more laws. 70 | 71 | Morris was recently convicted of violating 72 | the federal 1986 Computer Fraud and Abuse 73 | Act for sending a virus program into a 74 | national email system. 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /test data/cybrnazi.txt: -------------------------------------------------------------------------------- 1 | Via NY Transfer News Collective * All the News that Doesn't Fit 2 | 3 | From: uc006@freenet.victoria.bc.ca (Michael Bakunin) 4 | 5 | Calgary Herald Feb 2/94 6 | 7 | Cyber-Nazis baffle German police 8 | 9 | BERLIN(AP)- A year-old computer network has become 10 | the communications backbone of Germany's Nazi scene, with users 11 | sharing ideas on how to rid the country of foreigners, co-ordinate 12 | illegal rallies and swap bomb-making recipes. 13 | The Thule Network, guarded by passwords and loyalty tests, 14 | consists of at least a dozen bulletin boards in three western German 15 | states, law-enforcement officials and computer experts said. It is 16 | used by Nazis to avoid detection by police. 17 | The network's name derives from a German secret society 18 | that included many leading Nazis among its membership. 19 | With the network's aid, some 500 neo-Nazis formed a convoy 20 | that drove into the city of Fulda and rallied unhindered last year. 21 | But the Thule Network is much more than a place to look for 22 | rides to rallies. 23 | Suppose some young Nazis want to put out a newspaper, for example, 24 | but lack the know-how. Just plug into Resistance, one of the 25 | network's bulletin boards. "A network-connected attorney can 26 | check the text, a graphics office can put together the newspaper," 27 | the Resistance host says in a digital preamble. 28 | The network is also a refuge- where a crowd closely watched by 29 | police can disappear into cyberspace. Technologically ahead of most 30 | police, network gatekeepers are having considerable success keeping 31 | out the law. 32 | Not a single one has been prosecuted. 33 | "German police don't know much about computers and bulletin 34 | boards. It's very new for them," said Uwe Kauss, editor of the 35 | Munich-based computer magazine Chip, which has penetrated the network 36 | through informants. 37 | Chip estimates 1,500 of Germany's more than 40,000 Nazis are 38 | active on the network. Along with mobile phones and answering machines, 39 | the Thule Network is helping a diverse Nazi scene establish a united 40 | front - a phenomenon acknowledged by Germany's government. 41 | 42 | -- 43 | Autonomedia 44 | 618-620 View St. 45 | Victoria, BC V8W 1J6 46 | Canada email: uc006@freenet.victoria.bc.ca 47 | -------------------------------------------------------------------------------- /test data/delorean.txt: -------------------------------------------------------------------------------- 1 | 2 | The De Lorean Case 3 | 4 | ...typed for you by The Cruiser 5 | 6 | -------------------------------------------------------------------------------- 7 | 8 | De Lorean case will go to jury 9 | 10 | 11 | Former sports car maker John Z. De Lorean's fraud and racketeering trial 12 | moved with the speed of a Model-T Ford down a rutted country lane until 13 | his lawyer last week called a single witness and rested the defense. 14 | 15 | After closing arguments, scheduled for today in Detroit, U.S. District 16 | Judge Julian A. Cook Jr. was to order jurors to decide whether the former 17 | General Motors Corp. vice president illegally siphoned $8.5 million of 18 | investors' money from his sports car company before it failed. 19 | 20 | The only defense witness called was an FBI agent who sat at the prosecutors' 21 | table throughout the trial. De Lorean's lawyer, Howard Weitzman, grilled 22 | agent Richard Eggleston for three days last week, his questioning punctuated 23 | by numerous objections from prosecutors. 24 | 25 | The government had spent five weeks presenting its case, including 27 26 | witnesses and 4,000 pages of documents. 27 | 28 | De Lorean, a former star executive at GM who parlayed life in the fast lane 29 | into a car company all his own, did not testify. A 15-count federal indictment 30 | last year charged De Lorean, 61, with stealing the $8.5 million from 31 | De Lorean Research Limited Partnership. 32 | 33 | ... 34 | . . 35 | ... 36 | 37 | 38 | Thursday, December 18, 1986 39 | 40 | De Lorean found innocent 41 | 42 | Jury clears maker of defunct sports car of racketeering, fraud 43 | 44 | DETROIT - Former carmaker John De Lorean, accused of stealing millions of 45 | dollars from investors in his defunct sports car business, was found innocent 46 | yesterday of all charges of racketeering and fraud. 47 | 48 | On the sixth day of jury deliberations, the former General Motors executive 49 | proclaimed "Praise God" and embraced his defense lawyers when the verdict was 50 | read in the U.S. District Court. 51 | 52 | De Lorean, 61, in 1984 was acquitted of cocaine conspiracy charges after a 53 | sensational trial in Los Angeles. He had been accused of stealing at least 54 | $8.5 million from 140 investors in his car company, which briefly built 55 | gull-winged cars in Northern Ireland. 56 | De Lorean, who has said he intended to announce a new car venture if cleared, 57 | shook his head in relief and wiped tears from his eyes as the jury foreman 58 | responded "not guilty" for 15 findings. 59 | The silver-haired De Lorean, who was indicted in September 1985 on charges 60 | that he stole investors' money through an elaborate scheme involving money 61 | transfers between European and U.S. banks and corporations, was defended by 62 | flamboyant Los Angeles lawyer Howard Wietzman, who won the businessman's 63 | acquittal on the drug charges. 64 | 65 | Yesterday's verdict would appear to end the governmant's criminal case 66 | against him, but De Lorean is still fighting claims by creditors, including 67 | the British government. 68 | 69 | London and other creditors have frozen about $20 million of De Lorean's 70 | assets in bankruptcy proceedings in Detroit, which are expected to resume 71 | Jan. 21. 72 | 73 | As in the Los Angeles trial, De Lorean was found innocent without taking the 74 | stand in his own defense. 75 | 76 | In the 10-week trial, the government presented a stream of former De Lorean 77 | associates from the United States and Europe. The government hoped to support 78 | its contention that De Lorean masterminded an elaborate scheme to syphon 79 | investors' money under the guise of a purported loan that the prosecution said 80 | was a fraud. 81 | 82 | De Lorean's defense argued that the $8.5 million in question was recieved 83 | as part of a legitimate loan. Weitzman said the loan came from Colin Chapman, 84 | the deceased founder of the British auto group Lotus, which did engineering on 85 | the De Lorean car. 86 | 87 | Weitzman told jurors in his closing argument eight days ago that the 88 | government has persecuted his client. 89 | 90 | Had he been convicted on all counts, De Lorean would have faced maximum 91 | sentences of up to 87 years in prison and fines of up to $82,000. 92 | 93 | -------------------------------------------------------------------------------- /test data/emmanuel.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/emmanuel.txt -------------------------------------------------------------------------------- /test data/fadehack.txt: -------------------------------------------------------------------------------- 1 | R 2 | 3 | 24/125: The Fading Hacker Mystique 4 | Name: Mac??? #95 @5211 5 | Date: Sat Aug 24 22:06:32 1991 6 | From: Blitzkrieg (Louisville, Ky.) 7 | 8 | 9 | [FORMATTED FOR 80 COLUMNS 10 | AND MIXED CASE] 11 | 12 | The Fading Hacker Mystique: No Longer Just Pests 13 | 14 | CORPORATIONS TURNING UP THE HEAT ON HI-TECH INTERLOPERS 15 | 16 | The blurred line between computer hacking and computer crime has never been 17 | more unclear. With few precedents to guide them, criminologists, legal 18 | scholars, and computer users are scrambling to define the boundaries of 19 | acceptable PC hacking. 20 | That's not an easy job, but a resolution of the issue has never been more 21 | pressing--the prosecution of hackers charged with illegally infiltrating 22 | private data banks is on the upswing. 23 | Increasingly, federal authorities are pursuing hacker related crimnes, 24 | according to New York State Police Senior Investigator Don Delaney. 25 | "[Hacking] is causing companies to lose a lot of money," said Delaney, who 26 | made his comments at a recent conference on computer privacy hosted by New 27 | York University. 28 | The hacker mystique doesn't play at all well in corporate America, where 29 | MIS managers take dim views of outsiders busting into their data networks, and 30 | they often treat security breaches as crimes. 31 | The most famous case occurred a couple of years ago, when Robert Morris 32 | Jr., the son of a famous computer scientist, was put on trial and convicted 33 | after a rogue virus he invented paralyzed a nationwide computer network. 34 | More recently, a 17-year-old New York student was charged with a 35 | misdemeanor for cracking the code in Sprint's extensive UNIX networks. The 36 | apprentice hacker, who goes by the code name "Phiber Optic," maintains that he 37 | turned to hacking because his school didn't cover the computer subjects he was 38 | interested in. Even now, he still believes that private computer networks 39 | remain fair game. 40 | Intellectually curious or hi-tech felons? 41 | Hackers are rarely "just playing," Delaney said. Often, they've been 42 | involved in crimes such as stealing credit cards or accessing toll-free phone 43 | numbers. 44 | Ultimately, the answer will be colored by what side of the debate you 45 | take, but if Delaney's position reflects a wider hard-line attitude, hackers 46 | should take note. 47 | "Every company has the rihgt to have its computer networks left alone," he 48 | said. 49 | 50 | --[ Taken from the August, 1991 issue of Computer Shopper on page 128. ]-- 51 | _______________________________________________________________________________ 52 | 53 | Re-typed and uploaded by MAC??? of The NATO Association. 54 | 55 | - End of File - 56 | 57 | 58 | 59 | 60 | Read:(1-125,^24),? : -------------------------------------------------------------------------------- /test data/geraldo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/geraldo.txt -------------------------------------------------------------------------------- /test data/hack-nsa.txt: -------------------------------------------------------------------------------- 1 | Hackers retaliate by leaking manual 2 | 3 | By Keay Davidson` 4 | EXAMINER SCIENCE WRITER` 5 | 6 | Computer hackers waging what they say is a war against government 7 | electronic snooping have distributed over international computer 8 | networks a copy of the supersecret U.S. National Security Agency's 9 | employee manual. The NSA manual, which was sent to dozens of news 10 | organizations Tuesday, was distributed ""to embarrass the NSA'' and 11 | prove that even the U.S. government's most covert agency can't keep 12 | documents secret, said Grady Ward, a software designer from Arcata in 13 | Humboldt County. Ward said the document initially appeared on a 14 | Texas-based hackers network late last week, and he helped 15 | redistribute it over other electronic networks. ""The intent is to 16 | embarrass the NSA and demonstrate that even their own security manual 17 | can be distributed,'' Ward said. NSA officials said the document 18 | is an unclassified employee handbook. Anyone seeking a copy of it 19 | would need to file a Freedom of Information Act request, said NSA 20 | spokeswoman Judi Emmel. The identity of the person who initially 21 | obtained the document and how it was obtained was not clear. The 22 | manual warns employees to safeguard the document: ""While you may 23 | take this handbook home for further study, remember that is does 24 | contain "FOR OFFICIAL USE ONLY' information which should be 25 | protected. ... Appropriate administrative action will be taken to 26 | determine responsibility and to apply corrective and/or disciplinary 27 | measures in cases of unauthorized disclosure. . 28 | 29 | The document describes the basic goals and responsibilities of NSA 30 | employees. Among the document's instructions: ""Should strangers 31 | or casual acquaintances question you about your place of employment, 32 | an appropriate reply would be that you work for the Department of 33 | Defense. If questioned further as to where you are employed within 34 | the Department of Defense, you may reply, "NSA.' When you inform 35 | someone that you work for NSA (or the Department of Defense) you may 36 | expect that the next question will be, "What do you do?' ""It is a 37 | good idea to anticipate this question and to formulate an appropriate 38 | answer. Do not act mysteriously about your employment, as that would 39 | only succeed in drawing more attention to yourself. .x 40 | 41 | ""If you are employed as a linguist, you may say that you are a 42 | linguist, if necessary. However, you should not indicate the specific 43 | language(s) with which you are involved.'' Ward told The Examiner he 44 | obtained the document from an electronic computer ""magazine'' 45 | published in Texas. The magazine is accessible over the Internet, the 46 | global data network that any computer user can access with a 47 | computer, telephone and modem. Ward and others who have distributed 48 | the document on electronic networks did so to undermine a federal 49 | push for data-encryption regulations that would let the government 50 | tap into computer networks, Ward said. In recent months, the Clinton 51 | administration has pushed for future information systems to use a 52 | microchip called the ""Clipper Chip,'' which would allow the 53 | government, after receiving a court order, to tap any electronic 54 | transmission. They government said the chip would allow it to monitor 55 | criminal, espionage and terrorist activities. The plan is vehemently 56 | opposed by personal computer users and groups such as the Electronic 57 | Frontier Foundation, which fears the Clipper Chip could lead to 58 | government snooping on private citizens' legitimate activities and 59 | private lives.` 60 | -------------------------------------------------------------------------------- /test data/hacker2.txt: -------------------------------------------------------------------------------- 1 | 2 | San Jose Mercury News, Thursday morning, October 5, 1989 3 | "COPS GOT HIS NUMBER" 4 | "Teen accused of harassment calls via computer" 5 | 6 | The 16-year-old computer enthusiast loved to call people on the telephone 7 | and speaking through a voice synthesizer, talked like the devil. Over 8 | time, the boy harassed police departments from Cedar Rapids, Iowa, to 9 | Hayward, and managed to rack up an estimated $170,000 in illegal calls, 10 | authorities said. In one day last year, the Los Angeles-area youth, whom 11 | police would not identify by his real name, made 67 calls to the Hayward 12 | Police Department. And in a Los Angeles County town, dispatchers were 13 | distracted durning several emergencies one evening while they answered his 14 | crank calls for five hours. But try as they might, neither law 15 | enforcement officials nor an investigator at one telephone company that 16 | was paying the bills could figure out who was doing it. "He was, to be 17 | frank about it, a pain in the ass," said Stan McClurg, a Cedar Rapids 18 | detective." "He would call people and simply harass them." Then in 19 | early March, Kent, as he liked to call himself, spent five hours chatting 20 | with dispatchers at the Hayward Police Department. It was long enough 21 | for authorities to trace the call all the way to his home in San Gabriel. 22 | When police arrived that night, they found the teen-ager in bed with his 23 | Commodore 64, talking on the telephone. Police say they can prove the 24 | youth made about $2000 worth of fraudulent calls. When he is arraigned 25 | Oct, 16 he will be charged with fraud and making two harrassing phone 26 | calls, four bomb threats and interfering with the Hayward Police and Los 27 | Angeles Sheriff's Departments. Sgt. Kammer of the Los Angeles Sheriffs 28 | Department said the boy probably got the long-distance calling codes from 29 | a computer bulletin board, along with numbers that would get him into the 30 | phone mail systems of large companies. such as Sears Roebuck. Once 31 | inside, he ran thousands of combinations of numbers through his computer, 32 | leaving it on all day while he was away at school, police said. When he 33 | hit on codes that would get him into employees phone mail, he would 34 | retrieve their messages and leave devilish ones of his own. "The kid 35 | was very, very bright kid, and he was very bored," Kammer said. "He sat 36 | up all night doing these things." 37 | -------------------------------------------------------------------------------- /test data/hydebomb.txt: -------------------------------------------------------------------------------- 1 | 2 August 1982, Newsweek Magazine, pp. 31-32: 2 | 3 | "The plumed Blues and Royals were a noble spectacle as they rode their 4 | black mounts at a walk through London's Hyde Park. But as the 5 | cavalrymen marched past a parked sedan, a bomb hidden inside exploded. 6 | Windows shattered for blocks, flames burst high into the air and nails 7 | wrapped around the explosive shot out like bullets. Horses fell in a 8 | writhing mass, dying soldiers bled into the tatters of their ornate 9 | uniforms and a woman passer-by, her face shredded, screamed, 'Help me! 10 | Help me!' 11 | "Less than two hours later, on a bandstand in Regent's Park, the band 12 | of the Royal Green Jackets was giving a concert. As it played a 13 | medley from the musical 'Oliver!' a bomb hidden beneath the stage 14 | exploded -- and the entire bandstand erupted. 'Everything seemed to 15 | come up from the bottom of the bandstand and flew right into the air 16 | - -- the bodies, the instruments, everything,' said Ronald Benjamin, a 17 | member of the audience. 'A leg came within 5 feet of me.' 18 | "In one bloody day, the Irish Republican Army once again brought its 19 | war against the English to England. Even by IRA standards, last 20 | week's carnage was extraordinary: the blast at Hyde Park killed 4 21 | cavalrymen and injured 22 guards and civilians; in Regent's Park, 6 22 | bandsmen died and 24 musicians and 4 civilians were injured... 23 | "The terrorists planned their attack carefully. They parked a blue 24 | Morris sedan on Carriage Road, about 600 yards along the cavalry's 25 | daily route. Hidden inside was up to 10 pounds of gelignite explosive 26 | wrapped with hundreds of 4- and 6-inch nails. As the Blues and Royals 27 | passed, an IRA member, probably hidden in the trees of Hyde Park, 28 | punched a remote-controlled detonator... 29 | "The bomb [in Regent's Park] was planted under the floorboards on the 30 | stage. It was similar to the earlier bomb, but apparently worked on a 31 | timing device instead of remote control..." 32 | -------------------------------------------------------------------------------- /test data/isdn.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/isdn.txt -------------------------------------------------------------------------------- /test data/jilted.txt: -------------------------------------------------------------------------------- 1 | Jilted businessman accused in botched murder plot 2 | 3 | DOYLESTOWN, Pa. (UPI) _ They tried to kill him with a crossbow. 4 | And a gun. And a bomb, a truck and a Molotov cocktail. 5 | 6 | Police say all five attempts by hitmen to kill Edward Coulter 7 | failed, but the botched attempts led them to their prime suspect, 8 | Philadelphia businessman Alfons Kessler, 47. 9 | 10 | Kessler was charged Friday with conspiracy, attempted murder and 11 | related charges for allegedly trying to arrange the death of 12 | Coulter _ who is married to Catherine Coulter, Kessler's former 13 | lover of 10 years. 14 | 15 | Police unraveled the case when one of the accused hitmen was 16 | badly hurt last week by a pipe bomb he was trying to plant at 17 | Coulter's home. 18 | 19 | ``I'm not so sure they failed so miserably,'' Assistant Bucks 20 | County District Attorney Dale Reichley said Friday. ``I wouldn't 21 | call them a bunch of misfits ... this was a powerful bomb.'' 22 | 23 | Kessler was Catherine Coulter's boss as general manager of C.A. 24 | Spaulding Co., a tool company. Mrs. Coulter was the firm's 25 | controller. 26 | 27 | Prosecutors said Kessler allegedly hatched the murder plot after 28 | Catherine Coulter last year broke off a 10-year affair with him. 29 | Reichley said Catherine Coulter has not been charged in the case, 30 | and the investigation is continuing. 31 | 32 | Between September and February, Coulter was shot at with a 33 | crossbow and a gun, was nearly run off the road by a truck and 34 | someone tossed a Molotov cocktail into his dining room. 35 | 36 | Coulter was unaware why he was being targeted, and police had 37 | been providing him with escorts to and from work. 38 | 39 | One of the accused hit men, Wesley Rankin, 44, was arrested Feb. 40 | 27 after the pipe bomb he was trying to plant in the garage of 41 | Coulter's Bensalem home went off prematurely, blowing off his right 42 | hand and forearm. 43 | 44 | Police said Kessler paid Rankin $2,500 for the bombing, and an 45 | undisclosed sum of money for the other attempts. 46 | 47 | Authorities are still looking for the second accused hit man, 48 | Dwyaine Farley, 23, of Philadelphia. 49 | 50 | 51 | X-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-X 52 | Another file downloaded from: The NIRVANAnet(tm) Seven 53 | 54 | & the Temple of the Screaming Electron Taipan Enigma 510/935-5845 55 | Burn This Flag Zardoz 408/363-9766 56 | realitycheck Poindexter Fortran 510/527-1662 57 | Lies Unlimited Mick Freen 801/278-2699 58 | The New Dork Sublime Biffnix 415/864-DORK 59 | The Shrine Rif Raf 206/794-6674 60 | Planet Mirth Simon Jester 510/786-6560 61 | 62 | "Raw Data for Raw Nerves" 63 | X-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-X 64 | -------------------------------------------------------------------------------- /test data/johnmedi.txt: -------------------------------------------------------------------------------- 1 | 2 | "HACKER'S STORY: Two days on long distance" 3 | 4 | 5 | SOURCE: Toronto Computes! June '91 6 | ENTERED BY: Anarchy Tech 7 | 8 | [ This is a typical example of someone that "became" (Hahahahaha) a "hacker" 9 | (as he calls himself) for all the wrong reasons, and turned into an even lower 10 | life-form when he was caught. It is this kind of moron that gives the REAL 11 | hackers a bad name in society. ] -AT 12 | 13 | **NOTE** entries surrounded by square brackets "[]" are injection by me (AT). 14 | 15 | John Medica has been cured [Hahahaha] of his addiction to hacking. He was 16 | cured by a police bust and a theft conviction that landed him two years' 17 | probation and 100 hours of community service. 18 | Now the 22 year old Toronto man [mouse] wants to warn companies that 19 | hackers are out to get them. [ What a CROCK this is! Since when was HACKING 20 | (in it's most purest form) truely destructive!? ] And he wants to warn hackers 21 | that they can't always evade detection. [No shit, sherlock.] 22 | 23 | "If you get into it you're taking a risk. I thought I might get away with 24 | murder. If they don't listen then it's not my fault. Down the road they'll 25 | be in the same situation as me." 26 | He says many companies are very vulnerable to hacking. 27 | "There's a problem out there. I know a lot of people hacking on UNIX." 28 | [whistle, whistle] UNIX is a high-end operating system used by many government 29 | departments. 30 | "Down the road you could see federal systems going down," Medica warns. 31 | Medica says he became a hacker for fun and for braggin rights on computer 32 | bulletin boards. 33 | "You feel you have to try it. It's just like WarGames." [Ah Ha! There it is!] 34 | Once he'd broken into a system, he'd spread the news to his computer buddies 35 | connected to a special bulletin board for hackers. He says participating on 36 | the bulletin board was just like hanging out with a crowd. "It was competition, 37 | braggin." 38 | Last year Medica and some computer friends aquireda Bell Canada credit card 39 | from a hacker bulletin board. They used the number to embark on a long-distance 40 | phone binge that lasted two days. [Well, THERE'S A SMART MOVE.] They phoned 41 | British Columbia, California, London, and Sidney, Australia. 42 | The call to Sidney lasted two hours and included ording an Australian pizza 43 | from Pizza Hut. Calls to Brian Mulroney's office in Ottawa and the KGB in 44 | Moscow were unsuccessful. 45 | Medica says he also got access codes for a local company called Video 1, 46 | allowing him to call commercial party lines on the 976 exchange at the 47 | company's expense. He says that unlike other hackers, he never sold the access 48 | codes. 49 | "I wasn't into it that seriously." [Obviously, you got caught, didn't you.] 50 | The access codes were available to all participants of the hackers boards, he 51 | says. He found out about the hacker board through pirate boards which 52 | distribute illegally obtained software. He learned about pirate boards through 53 | friends. There are about 100 pirate boards in the Toronto area, says Medica. 54 | 55 | [ 100 eh? Hmmm, I wonder if that's as high as he can count? He forgot the 56 | other 300+ that he couldn't get on. 57 | 58 | BUT! I am being very critical. Personally, it could all be a front for the 59 | press. It's what I might do.. Actually, I wouldn't talk to the press at all. 60 | 61 | If this is not a front, which is highly likely, then this guy must be crazy. 62 | Using codes that were made public on a Hacker (or any other type of) BBS is 63 | totally insane. Never distribute your codes to anyone but your immediate 64 | group members (or friends) and don't accept codes from anyone but them. 65 | 66 | I stay away from codes all-together. They are easy to trace, and you don't 67 | learn anything from using them. Hack your own codes, and crack your own 68 | systems. Don't abuse someone else's efforts. ] 69 | 70 | -------------------------------------------------------------------------------- /test data/legion.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/legion.txt -------------------------------------------------------------------------------- /test data/marsface.txt: -------------------------------------------------------------------------------- 1 | 2 | July 8, 1988 3 | SOME SEE GLIMPSE OF LIFE IN 'FACE' OF MARS 4 | AP and UPI 5 | 6 | WASHINGTON - Unusual formations on the surface of Mars - including a 7 | mile-long rock shaped like a human face - may have been carved by a lost 8 | civilization, four scientists said yesterday. 9 | The scientists, including a former astronaut, said at a news conference 10 | that the chances are better than 50-50 that the structures were made by 11 | intelligent beings. 12 | The scientists said that a photograph taken of the Martian surface in 13 | 1976 by NASA's Viking spacecraft clearly shows a face that could have been 14 | carved out of a Martian mountain a half-million years ago. 15 | The sphinx-like image that stares outward from the planet may be part of 16 | a complex of buildings, as evidenced by other unusual formations nearby, the 17 | scientists said. 18 | 19 | Brian T. O'Leary, a former astronaut and an expert on Mars, said there 20 | is sufficient uncertainty about the origin of the rock face that it should 21 | be a major target for future spacecraft sent to Mars. 22 | O'Leary said last January he asked Soviet space scientists who were 23 | preparing to send probes to Mars to examine the area where the face appears. 24 | He said the Soviets were interested, but replied that their spacecraft was 25 | not technically designed to study the Cydonia region of Mars, where the 26 | sight is located. 27 | The Soviets launched a probe toward Mars yesterday and plan to launch a 28 | second one later this month. 29 | The news conference yesterday was prompted by a recent study of the 30 | Viking photographs conducted by Mark Carlotto, an optical engineering 31 | expert. 32 | 33 | In an article published in Applied Optics, Carlotto said that a computer 34 | enhancement of the Viking photographs shows that the face and various other 35 | nearby features appear to have been carved by "intelligent design." 36 | Yesterday, Carlotto told reporters that a sophisticated statistical study of 37 | the shapes clearly shows that "the face is not natural." 38 | Richard Hoagland, founder of a private organization of scientists called 39 | "The Mars Project," said that in addition to the face there is "a complex of 40 | unusual objects" at the Cydonia site. The complex includes a five-sided 41 | mountain that resembles a pyramid and a massif he believes could have been 42 | part of an astronomical marker. 43 | 44 | Hoagland said that a line drawn from the center of the city, across the 45 | face to the massif, or cliff, would line up exactly with the Sun at the 46 | moment of Mars' summer solstice, as it would have occured 500,000 years ago 47 | - an alignment it is extremely unlikely could occur naturally. 48 | 49 | 50 | (Source: The San Diego Union - July 8, 1988) 51 | 52 |  -------------------------------------------------------------------------------- /test data/marsmani.txt: -------------------------------------------------------------------------------- 1 | 13-Feb-88 01:53 MST 2 | Sb: APn 02/03 1227 Mars Mania 3 | Copyright, 1988. The Associated Press. All rights reserved. 4 | 5 | By RICHARD COLE Associated Press Writer 6 | MIAMI (AP) -- This year, Mars makes its closest approach to Earth in a 7 | generation, and astronomers say the red planet's appearance in the night sky 8 | may be the astronomical event of 1988. 9 | As if to demonstrate the heavens have a sense of humor, the height of the 10 | show comes in September, just shy of the 50th anniversary of Orson Welles' 1938 11 | "War of the Worlds" broadcast that panicked the nation with fictional reports 12 | of invaders from Mars. 13 | "I see 1988 as a great Martian adventure," said Jack Horkheimer, the aptly 14 | named "Star Hustler" of the Public Broadcasting System and executive director 15 | of Miami's Space Transit Planetarium. "I'm like a kid in a candy store." 16 | When Mars is at its closest on Sept. 21 -- just over 36 million miles away 17 | -- it will rival Jupiter as the brightest object in the sky after the Moon and 18 | Venus. 19 | "It won't be this close again until 2003," says Horkheimer. "And there are a 20 | lot of kids out there who have never seen it this bright." 21 | Unlike the comets Halley and Kohoutek, Mars will not disappoint viewers, 22 | because its brightness is more predictable and the planet will be easily 23 | visible from almost everywhere. 24 | He expects a spate of UFO sightings to accompany Mars' visit as people 25 | unaccustomed to the unblinking reddish-orange light in the night sky mistake it 26 | for more exotic extraterrestrial visitors. 27 | Like planetarium directors around the nation, Horkheimer is preparing a 28 | series of Mars shows he promises will "knock your socks off." 29 | One involves a snazzy computerized simulation of a flight through the 30 | planet's gigantic 2,500-mile-long version of the Grand Canyon. Another centers 31 | around an 18-mile-high mountain -- three times higher than Mount Everest -- 32 | capped by a crater the size of Georgia. 33 | He also is bringing to the planetarium a new telescope nicknamed "Awesome 34 | Orson" in honor of the late Welles' broadcast and girth. 35 | Although Earth passes Mars every two years, it is only every 15 to 17 years 36 | that the orbits of the third and fourth planets bring them as close together as 37 | in 1988. The year began with Earth and Mars separated by 200 million miles. 38 | The close encounter comes four years before a scheduled Mars probe by the 39 | Soviet Union. 40 | The Soviet plan to have the probes bring back Martian soil, and perhaps, 41 | Horkheimer said, settle the most intriguing question about Earth's neighbor -- 42 | whether life once existed on the now cold and arid desert planet. 43 | U.S. Mars landers in the 1970s tried to answer that question, but the 44 | chemical soil test results beamed back to Earth were inconclusive. 45 | "I really hope that they find fossilized signs of life," he said. "We know 46 | that there was water on Mars." 47 | Horkheimer said he also hopes the Soviet probes will spur the United States 48 | to revive its own space program, with a manned landing on Mars. 49 | "We could be on Mars easily within a decade or so," he said. "The technology 50 | already exisits. What is missing is the money." And the funds will be available 51 | only when the public once again supports spending the billions of dollars in 52 | funding a Martian landing would require. 53 | If the political situation allows it, a joint U.S.-Soviet mission to Mars 54 | could ease the financial burden on both countries, he said. 55 | It also could promote peace between the two rivals -- an ironic benefit from 56 | a planet named after the Roman god of war. 57 | 58 | 59 | 60 |  -------------------------------------------------------------------------------- /test data/mism6.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | ------------------------------------------------------------ 4 | Computer Hackers Beware! 5 | ------------------------------------------------------------ 6 | Senate Passes Computer Fraud And Abuse Act 7 | ------------------------------------------------------------ 8 | 9 | 10 | 11 | The Senate October 2 unanimously passed the Computer 12 | Fraud and Abuse Act of 1986. The bill, s. 2281, imposes 13 | fines of up to $500,000 and/or prison terms of up to 20 14 | years for breaking into government or financial 15 | institutions' computers. 16 | 17 | The Federal Government alone operates more than 18,000 18 | medium-scale and large-scale computers at some 4,500 19 | different sites. the Office of Technology Assessment 20 | estimates the government's investment in computers over the 21 | past four years at roughly $60 million. The General Services 22 | Administration estimates that there will be 250,000 to 23 | 500,000 computers in use by the Federal Government by 1990. 24 | 25 | In 1984, Legislators' attention to and concern about 26 | computer fraud was heightened by a report by the American 27 | Bar Association task force on computer crime. According to 28 | the report, based on a survey of 1,000 private organizations 29 | and public agencies, percent of the 283 respondents had been 30 | victimized by some form of computer crime, and more than 25 31 | percent had sustained financial losses totaling between an 32 | estimated $145 million and $730 million during one twelve- 33 | month period. 34 | 35 | To address this problem, the Senate and House enacted, 36 | in 1984, the first computer statute (18 u.s.c. 1030). Early 37 | this year both the House and Senate introduced legislation 38 | to expand and amend this statute. 39 | 40 | In the current bill, which is expected to be signed by 41 | President Reagan next week, penalties will be imposed on 42 | anyone who knowingly or intentionally accesses a computer 43 | without authorization, or exceeds authorized access and: 44 | 45 | (1) Obtains from government computers information 46 | relating to national defense and foreign relations. 47 | 48 | (2) Obtains information contained in financial records 49 | of financial institutions. 50 | 51 | (3) Affects the use of the government's operation of a 52 | computer in any department or agency of the 53 | government that is exclusively for the use of the 54 | U.S. Government. 55 | 56 | (4) Obtains anything of value, unless the object of the 57 | fraud and the thing obtained consists only of the 58 | use of the computer. 59 | 60 | (5) Alters, Damages, or Destroys Information in any 61 | federal interest computer, or prevents authorized 62 | use of any such computer or information. 63 | 64 | Under the bill, a person would be guilty of computer 65 | fraud if he or she causes a loss of $1,000 or more during 66 | any one year period. 67 | 68 | Depending on the offense, penalties include fines up to 69 | $100,000 for a misdemeanor, $250,000 for a felony, $500,000 70 | if the crime is committed by an organization, and prison 71 | terms of up to 20 years. 72 | 73 | The bill also prohibits traffic in passwords and other 74 | information from computers used for interstate or foreign 75 | commerce. This part of the bill makes it possible for 76 | Federal Prosecutors to crack down on Pirate Bulletin Boards 77 | and similar operations because the bill covers business 78 | computers, online networks, and online news and information 79 | services, all of which are considered interstate commerce. 80 | 81 | 82 | Downloaded from P-80 Systems.... -------------------------------------------------------------------------------- /test data/mitnick.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/mitnick.txt -------------------------------------------------------------------------------- /test data/neidorfd.txt: -------------------------------------------------------------------------------- 1 | 2 | CRAIG NEIDORF DEFENSE FUND 3 | 4 | Most of you know about the PHRACK trial and Craig Neidorf, the 5 | publisher of PHRACK. Craig put his neck on the line to provide 6 | timely, interesting, and helpful information to the phreak/hacker 7 | community. The government decided to put a stop to that flow of 8 | information so last summer they arrested Craig for publishing a 9 | PHRACK article on Bell South's Emergency 911 system, an article 10 | which contained a document which was illegally downloaded from Bell 11 | South's computer system. 12 | 13 | The government's case fell apart after it was revealed that the 14 | document, originally valued at $79,449 by Bell South, was 15 | obtainable by dialing an 800 number and paying $13. This, and the 16 | fact that Neidorf was not the one who accessed Bell South's 17 | computer, caused the government to drop the charges and set Craig 18 | Neidorf free. 19 | 20 | Good news, right? Well it is and it isn't. While the government 21 | lost its case, Craig Neidorf will no longer be publishing PHRACK, 22 | so the government still wins. The fact is that although Craig won 23 | in court, he must still pay his own attorney's fees. His original 24 | bill reached over $200,000! The law firm that he had retained found 25 | ways to reduce $100,000 off of that amount, but Craig and his 26 | family have still paid $35,000 to one firm, $8,000 to another, and 27 | have roughly $65,000 left to pay off. THIS IS AFTER "WINNING" IN 28 | COURT! 29 | 30 | No, he cannot sue the government for this money. He has to pay it 31 | himself. A lot of people seem to think that the Electronic Frontier 32 | Foundation (EFF) is going to help him with his legal bills. No way. 33 | The EFF does not want to be perceived as a "hacker defense fund" so 34 | they are trying to distance themselves from the case. Their only 35 | help in the matter was to pay for court motions filed by the EFF's 36 | law firm on Neidorf's behalf concerning the First Ammendment. THE 37 | EFF HAS NO PLANS AT THIS TIME TO HELP PAY NEIDORF'S ATTORNEY FEES. 38 | 39 | What this means is that Craig Neidorf, after being harrassed and 40 | arrested by the government for PUBLISHING A MAGAZINE, will shut his 41 | magazine down and pay approximately $100,000 in attorney fees EVEN 42 | THOUGH HE WON HIS COURT BATTLE. 43 | 44 | If you're as outraged by this as I am, I want you to know that you 45 | can do something about it. You can show Neidorf how much you valued 46 | PHRACK and how outraged you are about what happened to him by 47 | sending money to the Neidorf Defense Fund. Every cent that you send 48 | will be used to defray his attorney fees. The address is: 49 | 50 | NEIDORF DEFENSE FUND 51 | Attn: Sheldon Zenner 52 | Katten, Muchin & Zavis 53 | 525 West Monroe Street #1600 54 | Chicago, Illinois 60606-3693 55 | 56 | Checks *MUST* be payable to "Katten, Muchin & Zavis" and have 57 | "Neidorf Defense Fund" written in the memo field. 58 | 59 | You can help even more by downloading this message and uploading it 60 | to as many other BBSes as you possibly can. It's available on 61 | &TOTSE, 415/935-5845, as NEIDORFD.ZIP. Also, if you're a caller on 62 | &TOTSE and you send Neidorf a check for $25 or more, send me a 63 | photocopy of the cancelled check and I'll give you 250 file 64 | transfer credits. &TOTSE's mailing address is: Jeff Hunter, & the 65 | Temple of the Screaming Electron, P.O. Box 5378, Walnut Creek, CA, 66 | 94596. 67 | 68 | -------------------------------------------------------------------------------- /test data/nsw-porn.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/nsw-porn.txt -------------------------------------------------------------------------------- /test data/pbust.txt: -------------------------------------------------------------------------------- 1 | PORTUGUESE PIRATES BUSTED BY POLICE 2 | 3 | 4 | 5 | Two young guys were busted last friday by the portuguese police. 6 | 7 | The investigation ended after a week. The police done domiciliary inves- 8 | tigations in four houses and informatic material was captured by them, that 9 | the experts investigated. 10 | 11 | The two guys are acused of carding, entering in a european network, 12 | and capturing credit cards and then, and magnetizing the credit cards, 13 | with one machine that they carded from the usa. The method they used was 14 | deleting their credit cards and introducing the victims informations, that 15 | they got from the european networks. 16 | 17 | The phraud was discovered in the initial phase when a envolved 10/20 of 18 | citizens that noticed strange debits in their bancary accounts. 19 | 20 | Those young guys were the first to be busted by carding in Portugal. 21 | 22 | They are acused too of steel and then sell confidential information. 23 | 24 | The informatic 'experts' of the police investigated the computers, but 25 | those guys teached them how to use their engine, and maybe something else. 26 | 27 | The police says that millions of escudos were phrauded. 28 | 29 | Signed: Dr.Kaos & Traderfox 30 | 31 | PS: This text is true, you can consult Diario De Noticias of 14th of July. 32 | 33 | -------------------------------------------------------------------------------- /test data/phrack.txt: -------------------------------------------------------------------------------- 1 | 2 | MISSOURI MAN PLEADS NOT GUILTY TO 3 | CHARGES OF CRACKING 911 SYSTEM 4 | 5 | (Feb. 17) 6 | A 19-year-old University of Missouri 7 | has pleaded not guilty to federal 8 | charges he invaded the 911 emergency 9 | phone network for nine states, then 10 | passed along stolen information in an 11 | electronic publication. 12 | Craig Neidorf was indicted earlier 13 | this month along with Robert J. Riggs, 14 | 20, of Decatur, Ga. They are charged 15 | with interstate transportation of stolen 16 | property, wire fraud and violations of 17 | the federal Computer Fraud and Abuse Act 18 | of 1986. 19 | As reported earlier, prosecutors 20 | allege the two used computers to enter 21 | the 911 system of Atlanta's Bell South 22 | and copied the program that controls and 23 | maintains the system. The stolen 24 | material then allegedly was published on 25 | a computer bulletin board system 26 | operating in the Chicago suburb of 27 | Lockport. Authorities contend Neidorf 28 | edited the data for an electronic 29 | publication known as "Phrack." 30 | Associated Press writer Sarah Nordgren 31 | reports that at a hearing Thursday, 32 | assistant US Attorney William Cook was 33 | granted a motion to prevent the 911 34 | program from becoming part of the public 35 | record during the trial. US District 36 | Judge Nicholas Bua set April 16 as the 37 | trial date. 38 | The 911 system in question controls 39 | emergency calls to police, fire, 40 | ambulance and emergency services in 41 | cities in Alabama, Mississippi, Georgia, 42 | Tennessee, Kentucky, Louisiana, North 43 | Carolina, South Carolina and Florida. 44 | 45 | Downloaded From P-80 Systems 304-744-2253 46 | -------------------------------------------------------------------------------- /test data/press.txt: -------------------------------------------------------------------------------- 1 | 2 | How To Talk To The Press by Johnny Mnemonic 3 | Excerpt from Computer underground Digest 4.45 (23-Sep-92) 4 | 5 | ((It was rumored that, immediately after his appearance on 6 | an NBC news show, that John (Cap'n Crunch) Draper was 7 | released from his job for reasons of fiscal expediency. 8 | Whether true or not, this seems like a good time to reprint 9 | Mike Godwin's advice on "How to Talk to the Press" for those 10 | who are in the rolodexes of media folk)). 11 | 12 | This is a file I posted to an Austin BBS back when I gave the SJG 13 | story to the local papers. 14 | 15 | 104: Talking to Media, part 1 16 | By: Johnny Mnemonic [54] 17 | Date: 11:07 3/18/90 18 | 19 | As I've promised on another message base, here's the beginning of 20 | discussion of how to bring stories to the media. 21 | 22 | Since I keep thinking of different things people ought to know about 23 | how to take a story to the media, I'm going to make this a multi-post 24 | discussion. 25 | 26 | 1) TRY TO THINK LIKE THE REPORTER YOU'RE TALKING TO. 27 | 28 | One of the things that happens when people know about an event or 29 | series of events that may make a good news story is that they assume 30 | the importance of the story will be obvious to anyone. 31 | 32 | Sometimes this is true (when the tipster knows about a murder, for 33 | example). Often it's not. 34 | 35 | So, when I tell a reporter about a story I think she should want to 36 | cover, I make sure to stress the aspects of the story that are likely 37 | to interest that reporter and/or the readers of her publication. For 38 | example, when I spoke to Kyle Pope about the Illuminati seizure, I 39 | stressed the following: 40 | 41 | a) Steve Jackson Games is an Austin business that may end up being 42 | damaged by the seizure. 43 | 44 | b) Nobody has given this story anything like major coverage in the 45 | national media, or (so far as I knew) in other geographic areas. (I 46 | was telling him he had a major "scoop" opportunity.) 47 | 48 | c) There are some very dramatic aspects to this story. (I told him 49 | about the 20-year-old LoD member who woke up on the morning of March 1 50 | with a gun pointed at him by a Secret Service agent.) 51 | 52 | 2) IF YOU'RE GOING TO MEET THE REPORTER IN PERSON, TRY TO BRING 53 | SOMETHING ON PAPER. 54 | 55 | There are lots of good reasons to follow this rule: 56 | 57 | a) Believe it or not, but people take stuff on paper a little more 58 | seriously than the spoken word. It's nice to give the reporter 59 | something that lends substance to what you're saying, even if the 60 | substance is printouts from your own computer. 61 | 62 | b) It makes life easier for the reporter, who doesn't have to write 63 | down every single thing you tell her. Reporters like to have materials 64 | they can use for reference as they research and write their stories. 65 | 66 | c) It helps you remember to say everything you want to say. Nothing is 67 | more frustrating than trying to get a reporter interested in your 68 | story, getting inconclusive results, and then realizing later that you 69 | should have told the reporter about something. (E.g., "Damn! I forgot 70 | to tell him what 'cyberpunk' means, so he won't know how the federal 71 | agents misinterpreted the manual.") 72 | 73 | When I went to the Statesman, I took edited printouts of discussions 74 | from Flight, from SMOF, and from comp.dcom.telecom on Usenet. I also 75 | took some private Email I had received, with the names of the senders 76 | deleted. And I took my copy of the WHOLE EARTH REVIEW with the article 77 | on Usenet. My object was to convey to him the scale of concern about 78 | the seizures, plus give him enough background to be able to ask 79 | reasonably informed questions of the people he talked to. 80 | 81 | 3) GIVE THE REPORTER OTHER PEOPLE TO TALK TO, IF POSSIBLE. 82 | 83 | Two basic justifications for this rule: First, it'll help your 84 | credibility (especially if you don't already know the reporter 85 | personally). Second, multiple sources or witnesses usually enable the 86 | reporter to filter out what is mere opinion or speculation from what 87 | everybody actually knows for a fact. 88 | 89 | 4) DON'T ASSUME THAT THE REPORTER WILL COVER THE STORY THE WAY YOU'D 90 | LIKE HER TO. 91 | 92 | Reporters' accuracy and focus in a story are constrained by several 93 | factors: 94 | 95 | a) The amount of available time. Reporters have to be quick studies, 96 | and often have to assimilate a complex story in a hurry. This 97 | necessarily increases the risk of inaccuracy in a story, and gives you 98 | an even greater reason to follow Rules 1 through 3. 99 | 100 | 2) The reporters' obligation to be fair. This means they have to talk 101 | to people on the other side of the issues from you. This in turn means 102 | that you're unlikely to get a story that represents or promotes your 103 | point of view at the expense of those who oppose you. 104 | 105 | 106 | 107 | ------------------------------ 108 | -/Vuarnet International/- 109 | 617/527.oo91 110 | 24oo-16.8k HST/V32bis 111 | -------------------------------------------------------------------------------- /test data/pump.txt: -------------------------------------------------------------------------------- 1 | (NEWS)(GOVERNMENT)(NYC)(00001) 2 | 3 | Computer Access Arrests In NY 11/03/92 4 | GREENBURGH, NEW YORK, U.S.A., 1992 NOV 3 (NB) -- The 5 | Greenburgh, New York Police Department has announced the arrest of 6 | three individuals, Randy P. Sigman, 40; Ronald G. Pinz, Jr, 21; and Byron 7 | J. Woodard, 18 for the alleged crimes of Unauthorized Use Of A computer 8 | and Attempted Computer Trespass, both misdemeanors. Also arrested was 9 | Jason A. Brittain, 22 in satisfaction of a State of Arizona Fugitive From 10 | Justice warrant. 11 | 12 | The arrests took place in the midst of an "OctoberCon" or "PumpCon" 13 | party billed as a "hacker get-together" at the Marriott Courtyard Hotel in 14 | Greenburgh. . The arrests were made at approximately 4:00 AM on 15 | Sunday morning, November 1st. The three defendants arrested for 16 | computer crimes were granted $1,000 bail and will be arraigned on 17 | Friday, November 6th. 18 | 19 | Newsbytes sources said that the get together, which had attracted up to 20 | sixty people, had dwindled to approximately twenty-five when, at 10:00 21 | Saturday night, the police, in response to noise complaints arrived and 22 | allegedly found computers in use accessing systems over telephone lines. 23 | The police held the twenty-five for questioning and called in Westchester 24 | County Assistant District Attorney Kenneth Citarella, a prosecutor versed 25 | in computer crime, for assistance. During the questioning period, the 26 | information on Brittain as a fugitive from Arizona was obtained and at 27 | 4:00 the three alleged criminal trespassers and Brittain were charged. 28 | 29 | Both Lt. DeCarlo of the Greenburgh police and Citarella told Newsbytes 30 | that the investigation is continuing and that no further information is 31 | available at this time. 32 | 33 | (Barbara E. McMullen & John F. McMullen/19921103) 34 | -------------------------------------------------------------------------------- /test data/r&e_bust.txt: -------------------------------------------------------------------------------- 1 | .FBI raids major Ohio computer bulletin board; action follows joint 2 | investigation with SPA 3 | 4 | The Federation Bureau of Investigation on Saturday, Jan. 30, 1993, raided 5 | "Rusty & Edie's," a computer bulletin board located in Boardman, Ohio, 6 | which has allegedly been illegally distributing copyrighted software 7 | programs. Seized in the raid on the Rusty & Edie's bulletin board were 8 | computers, hard disk drives and telecommunications equipment, as well as 9 | financial and subscriber records. For the past several months, the Software 10 | Publishers Association ("SPA") has been working with the FBI in 11 | investigating the Rusty & Edie's bulletin board, and as part of that 12 | investigation has downloaded numerous copyrighted business and 13 | entertainment programs from the board. 14 | 15 | The SPA investigation was initiated following the receipt of complaints 16 | from a number of SPA members that their software was being illegally 17 | distributed on the Rusty & Edie's BBS. The Rusty & Edie's bulletin board 18 | was one of the largest private bulletin boards in the country. It had 124 19 | nodes available to callers and over 14,000 subscribers throughout the 20 | United States and several foreign countries. To date, the board has logged 21 | in excess of 3.4 million phone calls, with new calls coming in at the rate 22 | of over 4,000 per day. It was established in 1987 and had expanded to 23 | include over 19 gigabytes of storage housing over 100,000 files available 24 | to subscribers for downloading. It had paid subscribers throughout the 25 | United States and several foreign countries, including Canada, Luxembourg, 26 | France, Germany, Finland, the Netherlands, Spain, Sweden and the United 27 | Kingdom. 28 | 29 | A computer bulletin board allows personal computer users to access a host 30 | computer by a modem-equipped telephone to exchange information, including 31 | messages, files, and computer programs. The systems operator (Sysop) is 32 | generally responsible for the operation of the bulletin board and 33 | determines who is allowed to access the bulletin board and under what 34 | conditions. For a fee of $89.00 per year, subscribers to the Rusty & Edie's 35 | bulletin board were given access to the board's contents including many 36 | popular copyrighted business and entertainment packages. Subscribers could 37 | "download" or receive these files for use on their own computers without 38 | having to pay the copyrighted owner anything for them. 39 | 40 | "The SPA applauds the FBI's action today," said Ilene Rosenthal, general 41 | counsel for the SPA. "This shows that the FBI recognizes the harm that 42 | theft of intellectual property causes to one of the U.S.'s most vibrant 43 | industries. It clearly demonstrates a trend that the government understands 44 | the seriousness of software piracy." The SPA is actively working with the 45 | FBI in the investigation of computer bulletin boards, and similar raids on 46 | other boards are expected shortly. Whether it's copied from a program 47 | purchased at a neighborhood computer store or downloaded from a bulletin 48 | board thousands of miles away, pirated software adds to the cost of 49 | computing. According to the SPA, in 1991, the software industry lost $1.2 50 | billion in the U.S. alone. Losses internationally are several billion 51 | dollars more. 52 | 53 | "Many people may not realize that software pirates cause prices to be 54 | higher, in part, to make up for publisher losses from piracy," says Ken 55 | Wasch, executive director of the SPA. In addition, they ruin the 56 | reputation of the hundreds of legitimate bulletin boards that serve an 57 | important function for computer users." The Software Publishers Association 58 | is the principal trade association of the personal computer software 59 | industry. It's over 1,000 members represent the leading publishers in the 60 | business, consumer and education software markets. The SPA has offices in 61 | Washington DC, and Paris, France. 62 | 63 | 64 | CONTACT: Software Publishers Association, Washington 65 | Ilene Rosenthal, 202/452-1600 Ext. 318 66 | Terri Childs, 202/452-1600 Ext. 320 67 | 68 | -------------------------------------------------------------------------------- /test data/radhoax.txt: -------------------------------------------------------------------------------- 1 | Disc jockeys who were suspended by a radio station 2 | for concocting an elaborate on-air murder confession apologized to listeners 3 | Friday. 4 | 5 | Morning radio personalities Kevin Ryder, Gene ``Bean'' Baxter and former 6 | Arizona radio personality Doug Robert - who now works at the Southern 7 | California station - were suspended for the incident at KROQ-FM. The duration 8 | of the suspension wasn't disclosed. 9 | 10 | Ryder and Baxter were heard on a pre-recorded apology aired Friday. 11 | 12 | The disc jockeys were in a meeting with KROQ executives Friday afternoon and 13 | couldn't be reached. A receptionist who refused to give her name said none of 14 | the executives was available. 15 | 16 | Officials at the parent company, New York City-based Infinity Broadcasting 17 | likewise were unavailable, said a secretary for Mel Karmazian, Infinity 18 | president and chief executive officer. 19 | 20 | The trio was suspended a week after KROQ acknowledged the stunt to the Los 21 | Angeles County Sheriff's Department, which spent 10 months investigating 22 | hundreds of called-in leads. 23 | 24 | In Washington, the head of the Federal Communications Commission's 25 | enforcement division said Friday that similar hoaxes have led to FCC license 26 | revocations. The KROQ incident was being investigated by the FCC. 27 | 28 | ``We are evaluating what action, if any, we should take,'' said Charles E. 29 | Kelley, adding the station ``could lose its license for this type of incident 30 | if it was judged by the commission that the incident showed an abdication or a 31 | loss of control by the station.'' 32 | 33 | The Sheriff's Department plans to bill the Burbank station for time spent 34 | investigating last June's hoax, said sheriff's spokesman Bob Stoneman. 35 | 36 | ``I don't even want to guess'' the amount of the bill, Stoneman said. 37 | 38 | KROQ officials were considering further disciplinary action against the disc 39 | jockeys based upon listeners' responses, the Los Angeles Times reported. 40 | 41 | On Thursday, the station was deluged with hundreds of calls from angry 42 | listeners and at least one advertiser was threatening to pull its ads. 43 | 44 | The bogus confession was made June 13 during a ``Confess Your Crime'' skit 45 | 46 | in which Ryder and Baxter encouraged listeners to call in and reveal their 47 | transgressions on the air. 48 | 49 | The caller, who refused to give his name, said: 50 | 51 | ``I heard you guys talking. I really need to tell somebody about this. 52 | 53 | ``I had this girlfriend for like about six years and we were right on the 54 | verge of getting married and all of this stuff. And I came home and caught her 55 | with somebody ... a good friend of mine, as a matter of fact.'' 56 | 57 | After some prodding by the disc jockeys, the caller admitted that be had 58 | badly beaten his girlfriend: 59 | 60 | One disc jockey asked: ``Is there a chance, seriously, that you killed 61 | her?'' 62 | 63 | The caller responded: ``Yeah, I know I did.'' 64 | 65 | More than 60 calls and faxes flooded the station shortly after the 66 | confession aired. Hundreds more phoned authorities after the local news 67 | agencies and the TV series, ``Unsolved Mysteries'' reported the confession. 68 | 69 | Among the most persistent callers were a mother and father in Northern 70 | California who are trying to solve the mystery of their daughter's death. 71 | 72 | 73 | 74 | 75 | 76 | X-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-X 77 | Another file downloaded from: The NIRVANAnet(tm) Seven 78 | 79 | & the Temple of the Screaming Electron Taipan Enigma 510/935-5845 80 | Burn This Flag Zardoz 408/363-9766 81 | realitycheck Poindexter Fortran 510/527-1662 82 | Lies Unlimited Mick Freen 801/278-2699 83 | The New Dork Sublime Biffnix 415/864-DORK 84 | The Shrine Rif Raf 206/794-6674 85 | Planet Mirth Simon Jester 510/786-6560 86 | 87 | "Raw Data for Raw Nerves" 88 | X-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-X 89 | -------------------------------------------------------------------------------- /test data/requiem.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/requiem.txt -------------------------------------------------------------------------------- /test data/saddam.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/saddam.txt -------------------------------------------------------------------------------- /test data/satlink.txt: -------------------------------------------------------------------------------- 1 | 2 | Historians and politicians continue to debate who 3 | won the Gulf War, but anyone glued round-the- 4 | clock to their TV set will tell you the v-inner vvas 5 | CNN. With its live, raw, and riveting coverage, Ted 6 | Turner's 24-hour Cable News Network became the 7 | information source for an international audience that 8 | included everyone from competing news organizations 9 | to Saddam Hussein. 10 | For many of its live visual reports, CNN relied on on~ 11 | of its four mobile satellite communications systems, 12 | also called transportable earth stations, or "flyaways." 13 | The flyaways accept signals from standard video cam- 14 | eras and include video and audio processing equip- 15 | ment, a dish antenna for transmitting the signals via 16 | satellite, alld amplifiers for powering the system. 17 | ~lobile satellite technology has been in use at CNN 18 | 19 | since 1984, according to Dick Tauber, 20 | director of satellites and circuits for 21 | CNN. But the recent de~Telopment of 22 | more compact earth stations, with 23 | smaller antennas that broadcast on th~ 24 | higher-frequency Ku-band rather than 25 | C-band, has increased the systems' 26 | mobility and has cut the amount of 27 | time it takes news crews to set up and 28 | link up with a satellite. What once too] 29 | nearly a day now takes a few hours, 30 | he said. 31 | "The C-band transportable earth sta- 32 | tions took up a lot of cargo space - you 33 | needed a large truck, an 18-wheeler, 34 | 35 | because of the great big antenna," Tauber said. 36 | With Ku-band flyaway systems, antenna sizes have 37 | shrunk to about six to eight feet, compared with the 38 | more than 100 feet needed for C-band communications. 39 | The 13 components of the S-l Flyaway weigh less than 40 | 100 pounds each and fit into crates approved by the 41 | Federal Aviation Administration. 42 | FAA approval is a major selling point for the technol- 43 | ogy - instead of sending the flyaways as freight using 44 | cargo services, "we can ship the vv-hole thing as excess 45 | baggage on a commercial flight," Tauber said. "NOVV 46 | everything hits the ground at the same time: crew, 47 | reporters and a dozen or two boxes." 48 | The flyaways sell for between $200,000 to $340,000 49 | and are approved by the Washington, D.C.-based 50 | International Telecommunications Organization 51 | 52 | (Intelsat), a 25-year-old cooperative of more than 120 53 | member countries who ovvn and operate a global com- 54 | munications satellite system. "It's the concept of the 55 | global village," said Arnold .~leyers, manager of broad- 56 | cast services for Intelsat. "If anything happens in a 57 | country, there's more interest in seeing pictures live. " 58 | During the Gulf War and the tense period preceding 59 | it, CNN found itself competing with news organizations 60 | that also had deployed earth stations for live broadcasts. 61 | It had to move quickly to contract for satellite access 62 | time with the Iraqi, E~uwaiti and Saudi Arabian govern- 63 | ment ministries responsible for handling Intelsat com- 64 | munications services - a logistical nightmare Tauber 65 | described in one word: "Maalox." 66 | Today, three of CNN's flyaways are based in Atlanta 67 | and a fourth is stationed in London. "Before the war, 68 | 69 | there vvere 30 to 40 licensed transportable flyaways 70 | around the world," Tauber said. "But by the time the 71 | vvar was over, there were 130." 72 | Along with the flyaways, CNN has been issuing 73 | portable satellite phones to its news teams, Tauber said. 74 | With these phones, CNN reporters and crews in the 75 | field are virtually guaranteed an open channel of com- 76 | munication with more than l 5 foreign bureaus and 77 | CNN's Atlanta headquarters. 78 | But no matter how successful its satellite communi- 79 | cations solutions have been, CNN continues to explore 80 | even better ways to provide live coverage from news 81 | scenes around the vvorld. Tauber's group is watching 82 | the development of new digital video technologies, 83 | including video compression, that will make it possible 84 | to send live video over the telephone. 85 | One of the most promising of these technologies is 86 | Motion Picture Experts Group (MPEG), a video and 87 | audio compression standard expected to be finalized 88 | later this year. C-Cube Microsystems, a San Jose, Calif.- 89 | based compression systems developer, and Bell Atlantic 90 | have already demonstrated a prototype MPEG-based 91 | system capable of transmitting high-quality video from 92 | a central video file server to subscriber homes via stan- 93 | dard copper telephone lines. 94 | Tauber believes it will be three to five years before 95 | digital video technology will be cost-effective and 96 | capable of handling the netv--ork's broadcast-quality 97 | requirements. But he's not worried about the wait. 98 | "Time flies when you're doing bench tests," he 99 | said with a laugh. - - - 100 | -------------------------------------------------------------------------------- /test data/silvrspy.txt: -------------------------------------------------------------------------------- 1 | W 2 | 3 | 4 | ==)--- P TO PAUSE S TO STOP ---(== 5 | 6 |  7 | 8 | HERE IS AN ARTICLE ON ONE ,OF P-80'S MEMBERS WHO MANAGED TO 9 | DIG UP A LITTLE PUBLICITY. SILVER SPY. FROM THE U.S. NEWS AND 10 | WORLD REPORT (JUNE 3 85) 11 | 12 | WORLD OF SPY,17 YR OLD TINKER 13 | 14 | Silver Spy has everything going for him-comfortable surroundings, a father who is an engineer. He ranks in the top 3 percent of his high-school class. His SAT scores for college admission totaled 1,400 of a possible 1,600. He wants to attend Stanford or the Massachusetts Institute of Technology. But in the eyes of the phone companies he is a thief, and in the eyes of the law he's a criminal. Such is the portrait of this 17-year-old computer "hacker" and phone "phreaker who lives about 20 miles outside Boston. He spoke with U.S.News & World Report on the condition that neither his real name nor hometown be revealed. Catch 22. Silver Spy runs one of the most exclusive bullentin boards in the nation. Called Catch 22, it will have no more than 20 users, whose credentials will be carefully checked. Silver Spy, who owns two personal computers, wants to keep the user list small so that his board is not infiltrated by authorities. He terminated Spy Master, an earlier system, because of messages he received that threatened him with arrest. He believes they came from FBI agents. The youth says he has hacked, or tinkered, in a number of university and commercial computers, including some belonging to banks. But he maintains that he has never damaged a computer or compromised private information. "I'm out to learn as much as I can about a system and not do any damage," he says. "I know people who delete [destroy] files when they get into a commercial computer. There are little 12-year-olds who are so ecstatic about getting into a computer that they fill all its memory with the words,"Hacker was here." Phone phreaking-making toll calls that are billed to other users-is another story: "Since most of the people and boards I talk to are out ot state, I generally don't pay for the phone calls. Some people could say it's immoral. I know it's illegal." Then how does he justify calls for which others are asked to pay? "I don't," replies Silver Spy. 15 | 16 | DOWNLOADED FROM P-80 SYSTEMS.... 17 | 18 |  19 | 20 | 21 | 22 | LIBRARY MENU #3 23 | 24 | 25 | 26 | 27 | Fone Scrambling 28 | Stepline Fun 29 | Crashing NW/GBBS 30 | Wiretapping 31 | Tone Signa -------------------------------------------------------------------------------- /test data/sunhack.txt: -------------------------------------------------------------------------------- 1 | |----------------------------------------------------------------------------| 2 | | This phile was typed by DreadWolf... And like doesn't take no | 3 | | responsibity, for shit... So like give the Coven a kall... | 4 | | The Coven | 5 | | 360-297-4049 | 6 | |----------------------------------------------------------------------------| 7 | 8 | ****************************************************************************** 9 | HACKERS SUSPECTED IN ONLINE VANDALISM 10 | By Ken Alltucker 11 | ****************************************************************************** 12 | 13 | Two local Internet providers have been vandalized recently and they 14 | suspect it might be the work of computer hackers. 15 | Linknet, the online service provided by Kitsap Regional Library, 16 | has been shut down twice this week because of an online vandal. 17 | "We have been down two nights because of problems," said Michael 18 | Schuyler, director of Linknet. "We think we have it squared away at this 19 | point." 20 | Another Internet provider, Telebyte Northwest, was targeted by a 21 | vandal. It had soda sprayed across the windows in its office in Silverdale, 22 | said employee Al Wallace. 23 | A few months back, Telebyte's phone lines were severed, shutting 24 | down the system its operation for one day. 25 | Many area businesses depend on Telebyte for online communication, 26 | Wallace said. 27 | Linknet service stopped after someone sent a program that ate away 28 | the computer's memory. 29 | Schuyler said he has made changes in the system and doesn't expect 30 | problems to continue. 31 | The two linknet attacks halted the system for 12 hours Monday and 32 | Tuesday night. 33 | Telebyte has also had some online troubles, Wallace said. A "modem 34 | bomb" was recently sent to computer users, causeing several computers to 35 | freeze. 36 | Wallace said it only affected computer users who were online when it 37 | was sent. The bomb causes no permanent damage to computer; rather, users 38 | just have to restart the computers, Wallace said. 39 | Telebyte was the first Kitsap County commercial Internet provider 40 | when it started last October. 41 | Linknet is a service provided at no charge through Kitsap Regional 42 | Library. It allows users to send and receive electronic mail and access 43 | parts of the Internet. 44 | The demand for Linknet accounts has mushroomed since in(it) started 45 | last January, Schuyley said. 46 | It received a Library Services and Construction Grant two months ago 47 | allowing it to double its call-in lines. 48 | Schuyler said the new lines should be available this summer. 49 | -------------------------------------------------------------------------------- /test data/swpiracy.txt: -------------------------------------------------------------------------------- 1 | Software Piracy - NZListener 13-8-90 2 | 3 | Author: Chris Barton 4 | NZ Listener, August 13 1990. 5 | 6 | The copy command for IBM PCs and compatibles is seductively simple. 7 | You type: copy *.* a: You say: copy space star-dot-star space ay colon. 8 | People think the gibberish you're muttering indicates brain damage from too 9 | much exposure to low-level radiation from your computer screen. Wrong. Your 10 | computerspeak command to your PC simply translates to: copy a whole bunch of 11 | files onto the magnetic disk I've just placed in the disk drive named "a". 12 | 13 | This language is based on the principle of "your wish is my command", but get 14 | the command right. Its minimalist approach to communication demands precision 15 | and has no time for nuance, ambiguity or shades of meaning. Spell incorrectly 16 | or get a colon or a space out of place and the PC will come back with a curt 17 | error message like "Bad command or file name" and then sit steadfastly silent, 18 | oblivious to your curses, until you try again. 19 | 20 | The computer is also amoral. It's language has no words to warn you about the 21 | consequences of your actions. For example, it doesn't think of telling you that 22 | when you use the commands like copy *.* or diskcopy a: b: (diskcopy space ay 23 | colon space bee colon) that you could be committing theft. Or that, depending on 24 | whether they prosecute you under the Crimes Act or the Copyright Act, you could 25 | be facing imprisonment for up to 10 years, fines or a civil action for 26 | substantial damages. The PC is indifferent to the fact that in Australia fines 27 | up to $250,000 have been imposed. 28 | 29 | No one has been successfully prosecuted for illegally copying software in New 30 | Zealand yet. But if we believe what the software vendors tell us, the country 31 | is riddled with software thieves. They say up to 50 percent of all business 32 | software products such as wordprocessors and spreadsheets in use in New Zealand 33 | are illegal or "pirated" copies. And the recently formed New Zealand Business 34 | Software Association (NZBSA) - an alliance of software vendors - claims in the 35 | coming year "an estimated $80 million revenue will be forfeited by the New 36 | Zealand information industry alone as a result of widespread unauthorised 37 | copying and related illegal practices". 38 | 39 | So, if software piracy is rampant, why hasn't anyone been prosecuted? Well, it 40 | could be because software copyright has never been fully tested in New Zealand 41 | courts, although a judgment by Justice Smellie in favour of IBM in a copyright 42 | case against Computer Imports in March 1989 went some way towards setting a 43 | precedent. The problem is the New Zealand Copyright Act came into existence in 44 | 1962 when the computer industry was still in its infancy and contains no 45 | explicit references to computer software. This means any case will have to argue 46 | whether or not software constitutes a "literary work" as defined in the act and 47 | whether, in its machine-coded form, it is a "translation" or "adaptation" of 48 | the literary work. New Zealand legal minds are no doubt itching to sort this 49 | out. 50 | 51 | There are also cases of copying to other magnetic media such as video or audio 52 | cassette tape where copyright has been infringed and where prosecutions have 53 | been successful. But perhaps the real reason for little action being taken is 54 | that the copying of software in endemic and so easy to do. 55 | 56 | Software is an expensive item in any computer purchase - around $1000 each for 57 | well-known brands of database, wordprocessor or spreadsheet - and the ease with 58 | which you can copy these on to blank disks costing around $5 is very tempting, 59 | even if you don't get the software manual and the benefit of software upgrades. 60 | Some argue software vendors should look to a technological solution like the 61 | special chip included in the new digital audio tape (DAT) recorders being sold 62 | in the US which prevent making tape copies from compact disc players. 63 | 64 | But it's really a case of trying to shut the stable door after the horse has 65 | bolted. Although copy protection techniques have been used in some software, 66 | they've never been very successful and tend to cause problems in everyday use. 67 | And the vast quantity of existing pirated software makes the task even more 68 | difficult. Which is why the NZBSA is adopting an educate rather than prosecute 69 | approach. The main targets are educational institutions and large businesses 70 | where software is "shared" among the students or employees. And from time to 71 | time these sorts of institutions undergo a major purge of pirated software as 72 | was seen recently at Auckland Technical Institute. 73 | 74 | The NZBSA is also about to publish its "Software Compliance Manual for Decision 75 | Makers" which gives guidelines on the whole issue of copying software and its 76 | prevention. 77 | 78 | While no one expects the NZBSA to start breaking down doors and seizing personal 79 | computers, it's clear a prosecution test case would show they mean business. In 80 | the meantime expect to see warning notices appear around your place of work. The 81 | NZBSA example reads: 82 | 83 | WARNING 84 | Making unauthorised copies of software is a criminal act and can 85 | expose you to instant dismissal, substantial fines, claims for 86 | civil damages and even a jail term. Don't jeopardise your career. 87 | Use only original software. 88 | 89 | 90 |  -------------------------------------------------------------------------------- /test data/teensac.txt: -------------------------------------------------------------------------------- 1 | TWO TEENS ACCUSED OF CRACKING PHONES -- WHILE IN THE JAILHOUSE 2 | 3 | (Dec. 1) 4 | Two teen-agers in jail in San Jose, Calif., on computer cracking charges have 5 | lost their jailhouse phone privileges. That's because authorities say the boys 6 | used a jail phone to make illegal collect calls. 7 | 8 | Police told United Press International they believe the two -- Jonathan 9 | Yaantis, 18, and Michael Torrell, 19, both believed to be from Skagit County, 10 | Wash. -- made as many as three illegal calls from the county jail. 11 | 12 | UPI says the calls were made to a phone "bridge," or illegal conference-call 13 | network used by phone "phreakers," and billed to an unauthorized number in 14 | Virginia. 15 | 16 | "The first of the calls was made just two days after they were arrested," 17 | said Sgt. Dave Flory of the San Jose Police Department's high technology 18 | crime unit. 19 | 20 | Yaantis and Michael Torrell were arrested Nov. 2 by a San Jose police officer 21 | who spotted them at a phone booth near a convenience store. He said they were 22 | operating a laptop computer attached by wires with alligator clips to the phone 23 | wires. Police said insulation had been stripped from the phone wires to allow 24 | the connection. 25 | 26 | Allegedly, one or both of the boys subsequently made calls from the jail to 27 | the cracker network on Nov. 6 and 7, Flory said. He added, "Their telephone 28 | privileges were cut off because we didn't want to be accessories, since they 29 | are in our custody." 30 | 31 | The wire service says the pair is charged with several felonies, including 32 | damaging the phone company's line, theft and illegal use of phone card charge 33 | numbers and possession of a device to avoid phone charges. 34 | -------------------------------------------------------------------------------- /test data/testfile.txt: -------------------------------------------------------------------------------- 1 | foam foams roams foam foams roams foam foams roams foam foams roams foam foams roams -------------------------------------------------------------------------------- /test data/testfile2.txt: -------------------------------------------------------------------------------- 1 | apache is a great org. gfsdg hfsgfdg lucene -------------------------------------------------------------------------------- /test data/testfile3.txt: -------------------------------------------------------------------------------- 1 | networks. protocol -------------------------------------------------------------------------------- /test data/theburli.txt: -------------------------------------------------------------------------------- 1 | 2 | Peninsula Hackers Busted 3 | Uploaded by Elric of Imrryr 4 | Lunatic Labs News & Archieve Deptment 5 | 6 | Friday, July 26, 1985 7 | Millbrae police arrest alleged teen 'hackers' 8 | by John Curry - Times Staff Writer 9 | MILLBRAE -- A telephoned bomb threat to the White House early this summer led to 10 | the arrest Wednesday (7224) of two teen-age "computer hackers" in Millbrae, 11 | police report. 12 | The youths, 17 and 14, allegedly had hacked into access lines by computer and 13 | made phone calls all over the world. So far, the tab to Pacific Bell and MCI is 14 | over $10,000, police Sgt. Ron Caine said. 15 | The investigation began with the hacked phone call to the White House, 16 | leading federal authorities there to ask American Telephone and Telegraph (AT&T) 17 | to back-trace the call. When it was determined where the call came from, the 18 | lines were monitored and evidence of phone fraud came out, Caine said. 19 | "They apparently have some very sophisticated equipment in Washington 20 | and they can do things like that," Caine observed. 21 | The White House call was traced to a teleconference system that included as 22 | many as 59 illegal users, investigators reported. 23 | "They were just shocked that we had any knowledge of it," Detective Ray 24 | Celeste added. "The parents, of course, did not have any idea what their kids 25 | were doing." 26 | Police seized the youths' computers, an illegal hacking program and a 27 | homemade "blue box" that can simulate the tones made by puch-button telephone 28 | users, Celeste said. 29 | The object was to hack into someone's access code by computer, with the calls 30 | then to be billed to that someone else's number, Caine said. The 1.5 month 31 | investigation by federal and AT&T officials resulted in a 24-page search 32 | warrant documenting every call the boys allegedly had made up to that point, he 33 | added. The total could increase because there may have been other calls made 34 | this week since the search warrant was prepared, Caine went on. 35 | The hacking program was used to try up to 3,000 phone numbers and access codes 36 | per day to see what numbers could be abused, Celeste said. 37 | The boys were arrested at their homes and released back to their parents 38 | pending juvenile court action, Caine said. They could face charges of 39 | computer crime and telephone fraud, the officers said. Caine noted also that the 40 | parents could be held liable for the costs of the phone calls. 41 | This was not the Peninsula's first experience with alleged high-school 42 | hackers. In December 1984, police followed up on a tip that three 43 | Burlingame youths had invaded the GTE Sprint computer system via a "blue box" 44 | to use it without paying toll charges. The trio's phone lines were monitored 45 | for two weeks while they ran up over $500 worth of calls to the East Coast. 46 | 47 | 48 | 49 | 50 | Downloaded from Just Say Yes. 2 lines, More than 500 files online! 51 | Full access on first call. 415-922-2008 CASFA 52 | 53 | 54 | 55 | 56 | Another file downloaded from: 57 | 58 | ! 59 | -$- & the Temple of the Screaming Electron 60 | ! * Walnut Creek, CA 61 | + /^ | 62 | ! | |//^ _^_ 2400/1200/300 baud (415) 935-5845 63 | /^ / @ | /_-_ Jeff Hunter, Sysop 64 | |@ _| @ @|- - -| 65 | | | | /^ | _ | - - - - - - - - - * 66 | |___/____|_|_|_(_)_| Aaaaaeeeeeeeeeeeeeeeeee! / 67 | 68 | Specializing in conversations, E-Mail, obscure information, 69 | entertainment, the arts, politics, futurism, thoughtful discussion, 70 | insane speculation, and wild rumours. An ALL-TEXT BBS. 71 | 72 | "Raw data for raw minds." 73 | 74 | -------------------------------------------------------------------------------- /test data/vanish.txt: -------------------------------------------------------------------------------- 1 | 2 | THE HACKER WHO VANISHED 3 | 4 | Suicide or clever hoax? 5 | 6 | Early one morning last September, a pair of California computer whizzes 7 | were doing what hackers like to do best: peering at a glowing terminal, 8 | hacking away at a programs and talking about the books and movies they might 9 | someday write about their electronic exploits. Tom Anderson, 16, paused to 10 | take a shower. "When I came out," recalls Anderson, who had left his friend, 11 | Bill Landreth, busily pounding away at the keyboard, "he was gone. And I 12 | haven't seen him since." 13 | 14 | Landreth's disappearance from Escondido, a suburb of San diego, is of 15 | concern to more than his family and friends. Three years ago, Landreth-- 16 | whose IQ measured 163 and who was only 19 at the time--was convicted of wire 17 | fraud after he tapped into a GTE electronic-mail service whose clients 18 | included NASA and the Defense Department. Landreth passed the access code 19 | along to other hackers; soon computer users around the country were using the 20 | GTE lines for free. But since Landreth (known to other hackers as "THE 21 | CRACKER" from the book INNER CIRCLE) hadn't altered any message or stolen any 22 | money, the judge let him off with three years' probation. Landreth later 23 | capitalized on his notoriety by writing a book about breaking through computer 24 | security screens and by advising businesses on how to protect their system 25 | against the type of intrusion that got him in trouble. But by disappearing, 26 | Landreth broke his probation terms and is now being sought by U.S. marshals. 27 | 28 | The hacker's friends doubt that he is on the lam. "It's not unusual for 29 | him to disappear," says Jenny Perkes, a former girlfriend. Indeed Anderson 30 | remembers that Landreth liked to cultivate an air of mystery and recently had 31 | taken off to Mexico and later Sweden without telling anyone, returning a week 32 | later as if he had just been to the corner store. But Landreth has never 33 | before been gone for such a long period; and this time he vanished without 34 | leaving money for rent--a discourtesy that friends say is totally out of 35 | character. "He was a gentleman, a very thoughtful person, very generous,"says 36 | Perkes. 37 | 38 | Landreth left something esle behind, however: a rambling eight-page letter 39 | that only added to the mystery. Landreth apparently wrote the letter, which 40 | was addressed to no one and left in his room, during a period last summer when 41 | friends say he appeared depressed. The rambling discourse touches on such 42 | topics as evolution, nuclear war, society's greed, computers becoming more 43 | important than man,the meaning of existence and, ulitmately, suicide. "The 44 | idea is that I will commit suicde sometime around my 22nd birthday ," 45 | the note said. "This will have given me 22 or so years with which to convince 46 | myself that life really isn't worth living." Landreth's 22nd birthday was 47 | Dec. 26. Anderson, for one expects never to hear from his friend again. 48 | 49 | Cryptic message: But Landreth's apparent suicide note had an escape 50 | clause. Some months after typing the suicide reference, he added the words: 51 | "Since writing the above, my plans have changed slightly. I am going to take 52 | this money I have left...and make a final attempt at making life worthy. It 53 | will be a short attempt, and I do suspect that if it works out that none of my 54 | friends will know me then. If it dosen't work out, the news of my death will 55 | probably get around (I won't try to hide it)." 56 | 57 | Does Landreth's note reflect a troubled youth contemplating suicide or an 58 | adventure-loving computer genius playing an elaborate real-life Dungeons & 59 | Dragons like game on his friends? So far, neither the police nor the federal 60 | marshals nor his friends have been able to crack the mystery of the hacker who 61 | one day disappeared leaving no trace than a computer deleted sentence. 62 | 63 | George Hackett with Hilliard Harper in San Diego 64 | 65 | 66 | -------------------------------------------------------------------------------- /test data/vs010799.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ag-chirag/Lucene-Tutorials/272078fc8c1ffed27d2023bb603f49dbf6796b50/test data/vs010799.txt --------------------------------------------------------------------------------